From 0bb3ebdb6caeeed458936bc0a2a16898c622302e Mon Sep 17 00:00:00 2001 From: "akw27@labyrinth.cl.cam.ac.uk" Date: Tue, 8 Feb 2005 18:21:54 +0000 Subject: [PATCH] bitkeeper revision 1.1159.1.551 (42090342LHDFQZTluOIKtDxiXyfkHA) Initial checkin of blktap user-land tools. These are fairly experimental, but a few people have asked to use them. This checkin also includes Christian's gnbd client library code. Signed-off-by: andrew.warfield@cl.cam.ac.uk --- .rootkeys | 24 + tools/blktap/Makefile | 100 ++++ tools/blktap/README | 149 ++++++ tools/blktap/blkaio.c | 19 + tools/blktap/blkaiolib.c | 489 +++++++++++++++++++ tools/blktap/blkaiolib.h | 16 + tools/blktap/blkcow.c | 31 ++ tools/blktap/blkcowgnbd.c | 24 + tools/blktap/blkcowimg.c | 24 + tools/blktap/blkcowlib.c | 380 +++++++++++++++ tools/blktap/blkcowlib.h | 14 + tools/blktap/blkdump.c | 151 ++++++ tools/blktap/blkgnbd.c | 19 + tools/blktap/blkgnbdlib.c | 471 +++++++++++++++++++ tools/blktap/blkgnbdlib.h | 16 + tools/blktap/blkimg.c | 19 + tools/blktap/blkimglib.c | 325 +++++++++++++ tools/blktap/blkimglib.h | 16 + tools/blktap/blkint.h | 105 +++++ tools/blktap/blktaplib.c | 542 +++++++++++++++++++++ tools/blktap/blktaplib.h | 76 +++ tools/blktap/libgnbd/Makefile | 8 + tools/blktap/libgnbd/gnbdtest.c | 90 ++++ tools/blktap/libgnbd/libgnbd.c | 647 ++++++++++++++++++++++++++ tools/blktap/libgnbd/libgnbd.h | 25 + tools/python/xen/xend/server/blkif.py | 14 + xen/include/public/io/blkif.h | 2 +- 27 files changed, 3795 insertions(+), 1 deletion(-) create mode 100644 tools/blktap/Makefile create mode 100644 tools/blktap/README create mode 100644 tools/blktap/blkaio.c create mode 100644 tools/blktap/blkaiolib.c create mode 100644 tools/blktap/blkaiolib.h create mode 100644 tools/blktap/blkcow.c create mode 100644 tools/blktap/blkcowgnbd.c create mode 100644 tools/blktap/blkcowimg.c create mode 100644 tools/blktap/blkcowlib.c create mode 100644 tools/blktap/blkcowlib.h create mode 100644 tools/blktap/blkdump.c create mode 100644 tools/blktap/blkgnbd.c create mode 100644 tools/blktap/blkgnbdlib.c create mode 100644 tools/blktap/blkgnbdlib.h create mode 100644 tools/blktap/blkimg.c create mode 100644 tools/blktap/blkimglib.c create mode 100644 tools/blktap/blkimglib.h create mode 100644 tools/blktap/blkint.h create mode 100644 tools/blktap/blktaplib.c create mode 100644 tools/blktap/blktaplib.h create mode 100644 tools/blktap/libgnbd/Makefile create mode 100644 tools/blktap/libgnbd/gnbdtest.c create mode 100644 tools/blktap/libgnbd/libgnbd.c create mode 100644 tools/blktap/libgnbd/libgnbd.h diff --git a/.rootkeys b/.rootkeys index e3c0b4fdb0..97ace4a85e 100644 --- a/.rootkeys +++ b/.rootkeys @@ -315,6 +315,30 @@ 413aa1d0oNP8HXLvfPuMe6cSroUfSA patches/linux-2.6.9/agpgart.patch 3f776bd1Hy9rn69ntXBhPReUFw9IEA tools/Makefile 40e1b09db5mN69Ijj0X_Eol-S7dXiw tools/Rules.mk +4209033eUwhDBJ_bxejiv5c6gjXS4A tools/blktap/Makefile +4209033ewLAHdhGrT_2jo3Gb_5bDcA tools/blktap/README +4209033eX_Xw94wHaOCtnU9nOAtSJA tools/blktap/blkaio.c +4209033egwf6LDxM2hbaqi9rRdZy4A tools/blktap/blkaiolib.c +4209033f9yELLK85Ipo2oKjr3ickgQ tools/blktap/blkaiolib.h +4209033fL9LcSI6LXrIp5O4axbUBLg tools/blktap/blkcow.c +4209033fUDlFGZreIyZHdP7h7yfvuQ tools/blktap/blkcowgnbd.c +4209033fCgZzLeMOwNBFmsp99x58ZQ tools/blktap/blkcowimg.c +4209033frfXH6oOi9AvRz08PPAndNA tools/blktap/blkcowlib.c +4209033fhFd_y2go9HgCF395A35xJg tools/blktap/blkcowlib.h +4209033fHgtGpb_K16_xC9CpkjNZLw tools/blktap/blkdump.c +4209033fm61CZG1RyKDW75V-eTZ9fg tools/blktap/blkgnbd.c +4209033fVfa-R6MFgGcmsQHTDna4PA tools/blktap/blkgnbdlib.c +4209033fIgDQbaHwHStHhPEDTtbqsA tools/blktap/blkgnbdlib.h +4209033figp5JRsKsXY8rw4keRumkg tools/blktap/blkimg.c +42090340V-8HKGlr00SyJGsE5jXC3A tools/blktap/blkimglib.c +42090340c7pQbh0Km8zLcEqPd_3zIg tools/blktap/blkimglib.h +42090340_mvZtozMjghPJO0qsjk4NQ tools/blktap/blkint.h +42090340rc2q1wmlGn6HtiJAkqhtNQ tools/blktap/blktaplib.c +42090340C-WkRPT7N3t-8Lzehzogdw tools/blktap/blktaplib.h +42090340B3mDvcxvd9ehDHUkg46hvw tools/blktap/libgnbd/Makefile +42090340ZWkc5Xhf9lpQmDON8HJXww tools/blktap/libgnbd/gnbdtest.c +42090340ocMiUScJE3OpY7QNunvSbg tools/blktap/libgnbd/libgnbd.c +42090340G5_F_EeVnPORKB0pTMGGhA tools/blktap/libgnbd/libgnbd.h 4124b307nRyK3dhn1hAsvrY76NuV3g tools/check/Makefile 4124b307vHLUWbfpemVefmaWDcdfag tools/check/README 4124b307jt7T3CHysgl9LijNHSe1tA tools/check/check_brctl diff --git a/tools/blktap/Makefile b/tools/blktap/Makefile new file mode 100644 index 0000000000..389095e68c --- /dev/null +++ b/tools/blktap/Makefile @@ -0,0 +1,100 @@ +MAJOR = 2.0 +MINOR = 0 +SONAME = libblktap.so.$(MAJOR) + +CC = gcc + +XEN_ROOT = ../.. +include $(XEN_ROOT)/tools/Rules.mk + +INCLUDES += + +SRCS := +SRCS += blktaplib.c + +CFLAGS += -Wall +CFLAGS += -Werror +CFLAGS += -Wno-unused +#CFLAGS += -O3 +CFLAGS += -g3 +CFLAGS += -fno-strict-aliasing +CFLAGS += -I $(XEN_LIBXC) +CFLAGS += -I $(XEN_LIBXUTIL) +CFLAGS += $(INCLUDES) -I. +CFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE +# Get gcc to generate the dependencies for us. +CFLAGS += -Wp,-MD,.$(@F).d +DEPS = .*.d + +OBJS = $(patsubst %.c,%.o,$(SRCS)) + +LIB = libblktap.so libblktap.so.$(MAJOR) libblktap.so.$(MAJOR).$(MINOR) + +all: mk-symlinks blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio + $(MAKE) $(LIB) + +LINUX_ROOT := $(wildcard $(XEN_ROOT)/linux-2.6.*-xen-sparse) +mk-symlinks: + [ -e xen/linux ] || mkdir -p xen/linux + [ -e xen/io ] || mkdir -p xen/io + ( cd xen >/dev/null ; \ + ln -sf ../$(XEN_ROOT)/xen/include/public/*.h . ) + ( cd xen/io >/dev/null ; \ + ln -sf ../../$(XEN_ROOT)/xen/include/public/io/*.h . ) + ( cd xen/linux >/dev/null ; \ + ln -sf ../../$(LINUX_ROOT)/include/asm-xen/linux-public/*.h . ) + +install: all + mkdir -p $(prefix)/usr/lib + mkdir -p $(prefix)/usr/include + install -m0755 $(LIB) $(prefix)/usr/lib + ln -sf libblktap.so.$(MAJOR).$(MINOR) \ + $(prefix)/usr/lib/libblktap.so.$(MAJOR) + ln -sf libblktap.so.$(MAJOR) $(prefix)/usr/lib/libblktap.so + install -m0644 blktaplib.h $(prefix)/usr/include + +clean: + rm -rf *.a *.so *.o *.rpm $(LIB) *~ $(DEPS) xen TAGS blkdump blkcow blkimg blkcowimg blkgnbd blkcowgnbd blkaio + +rpm: all + rm -rf staging + mkdir staging + mkdir staging/i386 + rpmbuild --define "staging$$PWD/staging" --define '_builddir.' \ + --define "_rpmdir$$PWD/staging" -bb rpm.spec + mv staging/i386/*.rpm . + rm -rf staging + +libblktap.so: + ln -sf libblktap.so.$(MAJOR) $@ +libblktap.so.$(MAJOR): + ln -sf libblktap.so.$(MAJOR).$(MINOR) $@ +libblktap.so.$(MAJOR).$(MINOR): $(OBJS) + $(CC) -Wl,-soname -Wl,$(SONAME) -shared -o $@ $^ -L../libxutil -lxutil -lz + +blkdump: $(LIB) + $(CC) $(CFLAGS) -o blkdump -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkdump.c + +blkcowimg: $(LIB) blkcowimg.c blkcowlib.c blkimglib.c + $(CC) $(CFLAGS) -o blkcowimg -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcowimg.c blkimglib.c blkcowlib.c + +blkcow: $(LIB) blkcow.c blkcowlib.c + $(CC) $(CFLAGS) -o blkcow -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkcow.c blkcowlib.c + +blkimg: $(LIB) blkimg.c blkimglib.c + $(CC) $(CFLAGS) -o blkimg -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -l blktap blkimg.c blkimglib.c + +blkgnbd: $(LIB) blkgnbd.c blkgnbdlib.c + $(CC) $(CFLAGS) -o blkgnbd -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkgnbd.c blkgnbdlib.c libgnbd/libgnbd.a + +blkcowgnbd: $(LIB) blkgnbd.c blkcowlib.c blkgnbdlib.c + $(CC) $(CFLAGS) -o blkcowgnbd -ldb -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkcowgnbd.c blkgnbdlib.c blkcowlib.c libgnbd/libgnbd.a + +blkaio: $(LIB) blkaio.c blkaiolib.c + $(CC) $(CFLAGS) -o blkaio -L$(XEN_LIBXC) -L$(XEN_LIBXUTIL) -L. -lblktap blkaio.c blkaiolib.c -laio -lpthread + +.PHONY: TAGS clean install mk-symlinks rpm +TAGS: + etags -t $(SRCS) *.h + +-include $(DEPS) diff --git a/tools/blktap/README b/tools/blktap/README new file mode 100644 index 0000000000..cca9a28fd9 --- /dev/null +++ b/tools/blktap/README @@ -0,0 +1,149 @@ +Block Tap User-level Interfaces +Andrew Warfield +andrew.warfield@cl.cam.ac.uk +February 8, 2005 + +NOTE #1: The blktap is _experimental_ code. It works for me. Your +mileage may vary. Don't use it for anything important. Please. ;) + +NOTE #2: All of the interfaces here are likely to change. This is all +early code, and I am checking it in because others want to play with +it. If you use it for anything, please let me know! + +Overview: +--------- + +This directory contains a library and set of example applications for +the block tap device. The block tap hooks into the split block device +interfaces above Xen allowing them to be extended. This extension can +be done in userspace with the help of a library. + +The tap can be installed either as an interposition domain in between +a frontend and backend driver pair, or as a terminating backend, in +which case it is responsible for serving all requests itself. + +There are two reasons that you might want to use the tap, +corresponding to these configurations: + + 1. To examine or modify a stream of block requests while they are + in-flight (e.g. to encrypt data, or add data-driven watchpoints) + + 2. To prototype a new backend driver, serving requests from the tap + rather than passing them along to the XenLinux blkback driver. + (e.g. to forward block requests to a remote host) + + +Interface: +---------- + +At the moment, the tap interface is similar in spirit to that of the +Linux netfilter. Requests are messages from a client (frontend) +domain to a disk (backend) domain. Responses are messages travelling +back, acknowledging the completion of a request. the library allows +chains of functions to be attached to these events. In addition, +hooks may be attached to handle control messages, which signify things +like connections from new domains. + +At present the control messages especially expose a lot of the +underlying driver interfaces. This may change in the future in order +to simplify writing hooks. + +Here are the public interfaces: + +These allow hook functions to be chained: + + void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)); + void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)); + void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)); + +This allows a response to be injected, in the case where a request has +been removed using BLKTAP_STOLEN. + + void blktap_inject_response(blkif_response_t *); + +These let you add file descriptors and handlers to the main poll loop: + + int blktap_attach_poll(int fd, short events, int (*func)(int)); + void blktap_detach_poll(int fd); + +This starts the main poll loop: + + int blktap_listen(void); + +Example: +-------- + +blkimage.c uses an image on the local file system to serve requests to +a domain. Here's what it looks like: + +---[blkimg.c]--- + +/* blkimg.c + * + * file-backed disk. + */ + +#include "blktaplib.h" +#include "blkimglib.h" + + +int main(int argc, char *argv[]) +{ + image_init(); + + blktap_register_ctrl_hook("image_control", image_control); + blktap_register_request_hook("image_request", image_request); + blktap_listen(); + + return 0; +} + +---------------- + +All of the real work is in blkimglib.c, but this illustrates the +actual tap interface well enough. image_control() will be called with +all control messages. image_request() handles requests. As it reads +from an on-disk image file, no requests are ever passed on to a +backend, and so there will be no responses to process -- so there is +nothing registered as a response hook. + +Other examples: +--------------- + +Here is a list of other examples in the directory: + +Things that terminate a block request stream: + + blkimg - Use a image file/device to serve requests + blkgnbd - Use a remote gnbd server to serve requests + blkaio - Use libaio... (DOES NOT WORK) + +Things that don't: + + blkdump - Print in-flight requests. + blkcow - Really inefficient copy-on-write disks using libdb to store + writes. + +There are examples of plugging these things together, for instance +blkcowgnbd is a read-only gnbd device with copy-on-write to a local +file. + +TODO: +----- + +- Make session tracking work. At the moment these generally just handle a + single front-end client at a time. + +- Integrate with Xend. Need to cleanly pass a image identifier in the connect + message. + +- Make an asynchronous file-io terminator. The libaio attempt is + tragically stalled because mapped foreign pages make pfn_valid fail + (they are VM_IO), and so cannot be passed to aio as targets. A + better solution may be to tear the disk interfaces out of the real + backend and expose them somehow. + +- Make CoW suck less. + +- Do something more along the lines of dynamic linking for the + plugins, so thatthey don't all need a new main(). diff --git a/tools/blktap/blkaio.c b/tools/blktap/blkaio.c new file mode 100644 index 0000000000..25495718a4 --- /dev/null +++ b/tools/blktap/blkaio.c @@ -0,0 +1,19 @@ +/* blkaio.c + * + * libaio-backed disk. + */ + +#include "blktaplib.h" +#include "blkaiolib.h" + + +int main(int argc, char *argv[]) +{ + aio_init(); + + blktap_register_ctrl_hook("aio_control", aio_control); + blktap_register_request_hook("aio_request", aio_request); + blktap_listen(); + + return 0; +} diff --git a/tools/blktap/blkaiolib.c b/tools/blktap/blkaiolib.c new file mode 100644 index 0000000000..4538a9eb31 --- /dev/null +++ b/tools/blktap/blkaiolib.c @@ -0,0 +1,489 @@ +/* blkaiolib.c + * + * file/device image-backed block device -- using linux libaio. + * + * (c) 2004 Andrew Warfield. + * + * Xend has been modified to use an amorfs:[fsid] disk tag. + * This will show up as device type (maj:240,min:0) = 61440. + * + * The fsid is placed in the sec_start field of the disk extent. + * + * NOTE: This doesn't work. Grrr. + */ + +#define _GNU_SOURCE +#define __USE_LARGEFILE64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "blktaplib.h" + +//#define TMP_IMAGE_FILE_NAME "/dev/sda1" +#define TMP_IMAGE_FILE_NAME "fc3.image" + +#define MAX_DOMS 1024 +#define MAX_IMGNAME_LEN 255 +#define AMORFS_DEV 61440 +#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ +#define MAX_SEGMENTS_PER_REQ 11 +#define SECTOR_SHIFT 9 +#define MAX_AIO_REQS (MAX_REQUESTS * MAX_SEGMENTS_PER_REQ) + +#if 1 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + +char dbg_page[4096]; + +typedef struct { + /* These need to turn into an array/rbtree for multi-disk support. */ + int fd; + u64 fsid; + char imgname[MAX_IMGNAME_LEN]; + blkif_vdev_t vdevice; +} image_t; + +/* Note on pending_reqs: I assume all reqs are queued before they start to + * get filled. so count of 0 is an unused record. + */ +typedef struct { + blkif_request_t req; + int count; +} pending_req_t; + +static pending_req_t pending_list[MAX_REQUESTS]; +image_t *images[MAX_DOMS]; + +static io_context_t ctx; +static struct iocb *iocb_free[MAX_AIO_REQS]; +static int iocb_free_count; + +/* ---[ Notification mecahnism ]--------------------------------------- */ + +enum { + READ = 0, + WRITE = 1 +}; + +static int aio_notify[2]; +static volatile int aio_listening = 0; + +static struct io_event aio_events[MAX_AIO_REQS]; +static int aio_event_count = 0; + +/* this is commented out in libaio.h for some reason. */ +extern int io_queue_wait(io_context_t ctx, struct timespec *timeout); + +static void *notifier_thread(void *arg) +{ + int ret; + int msg = 0x00feeb00; + + printf("Notifier thread started.\n"); + for (;;) { + //if ((aio_listening) && ((ret = io_queue_wait(ctx, 0)) == 0)) { + if ((aio_listening) && + ((ret = io_getevents(ctx, 1, MAX_AIO_REQS, aio_events, 0)) > 0)) { + aio_event_count = ret; + printf("[Notifying! (%d)]\n", aio_event_count); + aio_listening = 0; + write(aio_notify[WRITE], &msg, sizeof(msg)); + fsync(aio_notify[WRITE]); + } else { + if (aio_listening) + printf("[io_queue_wait error! %d]\n", errno); + usleep(1000); /* Not ready to read. */ + } + } +} + +/* -------------------------------------------------------------------- */ + +int aio_control(control_msg_t *msg) +{ + domid_t domid; + DB *db; + int ret; + + if (msg->type != CMSG_BLKIF_BE) + { + printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); + return 0; + } + + switch(msg->subtype) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", + ((blkif_be_create_t *)msg->msg)->domid, + ((blkif_be_create_t *)msg->msg)->blkif_handle); + domid = ((blkif_be_create_t *)msg->msg)->domid; + if (images[domid] != NULL) { + printf("attempt to connect from an existing dom!\n"); + return 0; + } + + images[domid] = (image_t *)malloc(sizeof(image_t)); + if (images[domid] == NULL) { + printf("error allocating image record.\n"); + return 0; + } + + images[domid]->fd = -1; + images[domid]->fsid = 0; + + printf("Image connected.\n"); + break; + + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", + ((blkif_be_destroy_t *)msg->msg)->domid, + ((blkif_be_destroy_t *)msg->msg)->blkif_handle); + + domid = ((blkif_be_destroy_t *)msg->msg)->domid; + if (images[domid] != NULL) { + if (images[domid]->fd != -1) + close( images[domid]->fd ); + free( images[domid] ); + images[domid] = NULL; + } + break; + case CMSG_BLKIF_BE_VBD_GROW: + { + blkif_be_vbd_grow_t *grow; + + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_grow_t *)msg->msg)->domid, + ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); + printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); + grow = (blkif_be_vbd_grow_t *)msg->msg; + domid = grow->domid; + if (images[domid] == NULL) { + printf("VBD_GROW on unconnected domain!\n"); + return 0; + } + + if (grow->extent.device != AMORFS_DEV) { + printf("VBD_GROW on non-amorfs device!\n"); + return 0; + } + + /* TODO: config support for arbitrary image files/modes. */ + sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME); + + images[domid]->fsid = grow->extent.sector_start; + images[domid]->vdevice = grow->vdevice; + images[domid]->fd = open(TMP_IMAGE_FILE_NAME, + O_RDWR | O_DIRECT | O_LARGEFILE); + if (images[domid]->fd < 0) { + printf("Couldn't open image file! %d\n", errno); + return 0; + } + + printf("Image file opened. (%s)\n", images[domid]->imgname); + break; + } + } + return 0; +parse_error: + printf("Bad control message!\n"); + return 0; + +create_failed: + /* TODO: close the db ref. */ + return 0; +} + +int aio_request(blkif_request_t *req) +{ + int fd; + u64 sector; + char *spage, *dpage; + int ret, i, idx; + blkif_response_t *rsp; + domid_t dom = ID_TO_DOM(req->id); + + if ((images[dom] == NULL) || (images[dom]->fd == -1)) { + printf("Data request for unknown domain!!! %d\n", dom); + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; + } + + fd = images[dom]->fd; + + switch (req->operation) + { + case BLKIF_OP_PROBE: + { + struct stat stat; + vdisk_t *img_info; + + + /* We expect one buffer only. */ + if ( req->nr_segments != 1 ) + goto err; + + /* Make sure the buffer is page-sized. */ + if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || + (blkif_last_sect (req->frame_and_sects[0]) != 7) ) + goto err; + + /* loop for multiple images would start here. */ + + ret = fstat(fd, &stat); + if (ret != 0) { + printf("Couldn't stat image in PROBE!\n"); + goto err; + } + + img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); + img_info[0].device = images[dom]->vdevice; + img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; + img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT); + + if (img_info[0].capacity == 0) + img_info[0].capacity = ((u64)1 << 63); // xend does this too. + + DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device, + img_info[0].capacity); + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_PROBE; + rsp->status = 1; /* number of disks */ + + return BLKTAP_RESPOND; + } + case BLKIF_OP_WRITE: + { + unsigned long size; + struct iocb *io; + struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; + + idx = ID_TO_IDX(req->id); + ASSERT(pending_list[idx].count == 0); + memcpy(&pending_list[idx].req, req, sizeof(*req)); + pending_list[idx].count = req->nr_segments; + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT)); + + spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + + /*convert size and sector to byte offsets */ + size <<= SECTOR_SHIFT; + sector <<= SECTOR_SHIFT; + + io = iocb_free[--iocb_free_count]; + io_prep_pwrite(io, fd, spage, size, sector); + io->data = (void *)idx; + ioq[i] = io; + } + + ret = io_submit(ctx, req->nr_segments, ioq); + if (ret < 0) + printf("BADNESS: io_submit error! (%d)\n", errno); + + pending_list[idx].count = req->nr_segments; + + return BLKTAP_STOLEN; + + } + case BLKIF_OP_READ: + { + unsigned long size; + struct iocb *io; + struct iocb *ioq[MAX_SEGMENTS_PER_REQ]; + + idx = ID_TO_IDX(req->id); + ASSERT(pending_list[idx].count == 0); + memcpy(&pending_list[idx].req, req, sizeof(*req)); + pending_list[idx].count = req->nr_segments; + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + + + DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) " + "pos: %15lu dpage: %p\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT), dpage); + + /*convert size and sector to byte offsets */ + size <<= SECTOR_SHIFT; + sector <<= SECTOR_SHIFT; + + io = iocb_free[--iocb_free_count]; + + io_prep_pread(io, fd, dpage, size, sector); + io->data = (void *)idx; + + ioq[i] = io; + } + + ret = io_submit(ctx, req->nr_segments, ioq); + if (ret < 0) + printf("BADNESS: io_submit error! (%d)\n", errno); + + + return BLKTAP_STOLEN; + + } + } + + printf("Unknown block operation!\n"); +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; +} + + +int aio_pollhook(int fd) +{ + struct io_event *ep; + int n, ret, idx; + blkif_request_t *req; + blkif_response_t *rsp; + + DPRINTF("aio_hook(): \n"); + + for (ep = aio_events; aio_event_count-- > 0; ep++) { + struct iocb *io = ep->obj; + idx = (int) ep->data; + + if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){ + printf("gnbd returned a bad cookie (%u)!\n", idx); + break; + } + + if ((int)ep->res < 0) printf("aio request error! (%d,%d)\n", + (int)ep->res, (int)ep->res2); + + pending_list[idx].count--; + iocb_free[iocb_free_count++] = io; + + if (pending_list[idx].count == 0) { + blkif_request_t tmp = pending_list[idx].req; + rsp = (blkif_response_t *)&pending_list[idx].req; + rsp->id = tmp.id; + rsp->operation = tmp.operation; + rsp->status = BLKIF_RSP_OKAY; + blktap_inject_response(rsp); + } + } + + printf("pollhook done!\n"); + + read(aio_notify[READ], &idx, sizeof(idx)); + aio_listening = 1; + + return 0; +} + +/* the image library terminates the request stream. _resp is a noop. */ +int aio_response(blkif_response_t *rsp) +{ + return BLKTAP_PASS; +} + +void aio_init(void) +{ + int i, rc; + pthread_t p; + + for (i = 0; i < MAX_DOMS; i++) + images[i] = NULL; + + for (i = 0; i < MAX_REQUESTS; i++) + pending_list[i].count = 0; + + memset(&ctx, 0, sizeof(ctx)); + rc = io_queue_init(MAX_AIO_REQS, &ctx); + if (rc != 0) { + printf("queue_init failed! (%d)\n", rc); + exit(0); + } + + for (i=0; i +#include +#include +#include +#include "blktaplib.h" + +#define MAX_DOMS 1024 +#define MAX_DBNAME_LEN 255 +#define AMORFS_DEV 61440 +#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ + +#if 0 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +/* Berkeley db has different params for open() after 4.1 */ +#ifndef DB_VERSION_MAJOR +# define DB_VERSION_MAJOR 1 +#endif /* DB_VERSION_MAJOR */ +#ifndef DB_VERSION_MINOR +# define DB_VERSION_MINOR 0 +#endif /* DB_VERSION_MINOR */ + +typedef struct { + DB *db; + u64 fsid; + char dbname[MAX_DBNAME_LEN]; +} cow_t; + +cow_t *cows[MAX_DOMS]; +blkif_request_t *reread_list[MAX_REQUESTS]; + +int cow_control(control_msg_t *msg) +{ + domid_t domid; + DB *db; + int ret; + + if (msg->type != CMSG_BLKIF_BE) + { + printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); + return 0; + } + + switch(msg->subtype) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", + ((blkif_be_create_t *)msg->msg)->domid, + ((blkif_be_create_t *)msg->msg)->blkif_handle); + domid = ((blkif_be_create_t *)msg->msg)->domid; + if (cows[domid] != NULL) { + printf("attempt to connect from an existing dom!\n"); + return 0; + } + + cows[domid] = (cow_t *)malloc(sizeof(cow_t)); + if (cows[domid] == NULL) { + printf("error allocating cow.\n"); + return 0; + } + + cows[domid]->db = NULL; + cows[domid]->fsid = 0; + + printf("COW connected.\n"); + break; + + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", + ((blkif_be_destroy_t *)msg->msg)->domid, + ((blkif_be_destroy_t *)msg->msg)->blkif_handle); + + domid = ((blkif_be_destroy_t *)msg->msg)->domid; + if (cows[domid] != NULL) { + if (cows[domid]->db != NULL) + cows[domid]->db->close(cows[domid]->db, 0); + free(cows[domid]); + cows[domid] = NULL; + } + break; + case CMSG_BLKIF_BE_VBD_GROW: + { + blkif_be_vbd_grow_t *grow; + + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_grow_t *)msg->msg)->domid, + ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); + printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); + grow = (blkif_be_vbd_grow_t *)msg->msg; + domid = grow->domid; + if (cows[domid] == NULL) { + printf("VBD_GROW on unconnected domain!\n"); + return 0; + } + + if (grow->extent.device != AMORFS_DEV) { + printf("VBD_GROW on non-amorfs device!\n"); + return 0; + } + + sprintf(&cows[domid]->dbname[0], "%020llu.db", + grow->extent.sector_start); + + cows[domid]->fsid = grow->extent.sector_start; + + if ((ret = db_create(&db, NULL, 0)) != 0) { + fprintf(stderr, "db_create: %s\n", db_strerror(ret)); + return 0; + } + + +#if DB_VERSION_MAJOR < 4 || (DB_VERSION_MAJOR == 4 && DB_VERSION_MINOR < 1) + + if ((ret = db->open( db, cows[domid]->dbname, NULL, DB_BTREE, + DB_CREATE, 0664)) != 0) { + +#else /* DB_VERSION >= 4.1 */ + + if ((ret = db->open( db, NULL, cows[domid]->dbname, NULL, DB_BTREE, + DB_CREATE, 0664)) != 0) { + +#endif /* DB_VERSION < 4.1 */ + + db->err(db, ret, "%s", cows[domid]->dbname); + goto create_failed; + } + cows[domid]->db = db; + printf("Overlay db opened. (%s)\n", cows[domid]->dbname); + break; + } + } + return 0; +parse_error: + printf("Bad control message!\n"); + return 0; + +create_failed: + /* TODO: close the db ref. */ + return 0; +} + +int cow_request(blkif_request_t *req) +{ + DB *db; + DBT key, data; + u64 sector; + char *spage, *dpage; + int ret, i, idx; + blkif_response_t *rsp; + domid_t dom = ID_TO_DOM(req->id); + + if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) { + printf("Data request for unknown domain!!! %d\n", dom); + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; + } + + db = cows[dom]->db; + + switch (req->operation) + { + case BLKIF_OP_PROBE: +/* debug -- delete */ +idx = ID_TO_IDX(req->id); +reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req)); +memcpy(reread_list[idx], req, sizeof(*req)); + return BLKTAP_PASS; + + case BLKIF_OP_WRITE: + for (i = 0; i < req->nr_segments; i++) { + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + sector = req->sector_number + (8*i); + key.data = §or; + key.size = sizeof(sector); + + spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + data.data = spage; + data.size = PAGE_SIZE; + + + DPRINTF("cWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << 9)); + + if ((ret = db->put(db, NULL, &key, &data, 0)) == 0) + DPRINTF("db: %lld: key stored.\n", *((u64 *)key.data)); + else { + db->err(db, ret, "DB->put"); + goto err; + } + } + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_WRITE; + rsp->status = BLKIF_RSP_OKAY; + + return BLKTAP_RESPOND; + + case BLKIF_OP_READ: + for (i = 0; i < req->nr_segments; i++) { + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + sector = req->sector_number + (8*i); + key.data = §or; + key.size = sizeof(sector); + + DPRINTF("cREAD: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << 9)); + + if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) { + DPRINTF("db: %llu: key retrieved (req).\n", + *((u64 *)key.data)); + + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + spage = data.data; + memcpy(dpage, spage, PAGE_SIZE); + + } else if (ret == DB_NOTFOUND) { + idx = ID_TO_IDX(req->id); + if (idx > MAX_REQUESTS) { + printf("Bad index!\n"); + goto err; + } + if (reread_list[idx] != NULL) { + printf("Dupe index!\n"); + goto err; + } + reread_list[idx] = (blkif_request_t *)malloc(sizeof(*req)); + memcpy(reread_list[idx], req, sizeof(*req)); + return BLKTAP_PASS; + } else { + db->err(db, ret, "DB->get"); + goto err; + } + } + + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_READ; + rsp->status = BLKIF_RSP_OKAY; + return BLKTAP_RESPOND; + } + + printf("Unknow block operation!\n"); + return BLKTAP_PASS; +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; +} + +int cow_response(blkif_response_t *rsp) +{ + blkif_request_t *req; + int i, ret; + DB *db; + DBT key, data; + u64 sector; + char *spage, *dpage; + int idx = ID_TO_IDX(rsp->id); + domid_t dom; + + /* don't touch erroring responses. */ + if (rsp->status == BLKIF_RSP_ERROR) + return BLKTAP_PASS; + + if ((rsp->operation == BLKIF_OP_READ) && (reread_list[idx] != NULL)) + { + req = reread_list[idx]; + dom = ID_TO_DOM(req->id); + + if ((cows[dom] == NULL) || (cows[dom]->db == NULL)) { + printf("Response from unknown domain!!! Very badness! %d\n", dom); + return BLKTAP_PASS; + } + + db = cows[dom]->db; + + for (i = 0; i < req->nr_segments; i++) { + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + + sector = req->sector_number + (8*i); + key.data = §or; + key.size = sizeof(sector); + + if ((ret = db->get(db, NULL, &key, &data, 0)) == 0) { + printf("db: %llu: key retrieved (rsp).\n", + *((u64 *)key.data)); + + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + spage = data.data; + memcpy(dpage, spage, PAGE_SIZE); + + } else if (ret == DB_NOTFOUND) { + continue; /* We read this from disk. */ + } else { + db->err(db, ret, "DB->get"); + goto err; + } + } + free(reread_list[idx]); + reread_list[idx] = NULL; + } + + if (rsp->operation == BLKIF_OP_PROBE) { + + vdisk_t *img_info; + + req = reread_list[idx]; + img_info = (vdisk_t *)(char *)MMAP_VADDR(ID_TO_IDX(req->id), 0); + for (i =0; i < rsp->status; i++) + printf("PROBE (%d) device: 0x%04x capacity: %llu, info: 0x%04x\n", + i, + img_info[0].device, + img_info[0].capacity, + img_info[0].info); + free(reread_list[idx]); + reread_list[idx] = NULL; + } + +err: + return BLKTAP_PASS; +} + +void cow_init(void) +{ + int i; + + for (i = 0; i < MAX_DOMS; i++) + cows[i] = NULL; + + for (i = 0; i < MAX_REQUESTS; i++) + reread_list[MAX_REQUESTS] = NULL; +} + diff --git a/tools/blktap/blkcowlib.h b/tools/blktap/blkcowlib.h new file mode 100644 index 0000000000..e6bd7a5898 --- /dev/null +++ b/tools/blktap/blkcowlib.h @@ -0,0 +1,14 @@ +/* blkcowlib.h + * + * copy on write a block device. in a really inefficient way. + * + * (c) 2004 Andrew Warfield. + * + * public interfaces to the CoW tap. + * + */ + +int cow_control (control_msg_t *msg); +int cow_request (blkif_request_t *req); +int cow_response (blkif_response_t *rsp); +void cow_init (void); diff --git a/tools/blktap/blkdump.c b/tools/blktap/blkdump.c new file mode 100644 index 0000000000..f7cde9d89a --- /dev/null +++ b/tools/blktap/blkdump.c @@ -0,0 +1,151 @@ +/* blkdump.c + * + * show a running trace of block requests as they fly by. + * + * (c) 2004 Andrew Warfield. + */ + +#include +#include "blktaplib.h" + +int control_print(control_msg_t *msg) +{ + if (msg->type != CMSG_BLKIF_BE) + { + printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); + return 0; + } + + switch(msg->subtype) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", + ((blkif_be_create_t *)msg->msg)->domid, + ((blkif_be_create_t *)msg->msg)->blkif_handle); + break; + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", + ((blkif_be_destroy_t *)msg->msg)->domid, + ((blkif_be_destroy_t *)msg->msg)->blkif_handle); + break; + case CMSG_BLKIF_BE_CONNECT: + if ( msg->length != sizeof(blkif_be_connect_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_CONNECT(d:%d,h:%d)\n", + ((blkif_be_connect_t *)msg->msg)->domid, + ((blkif_be_connect_t *)msg->msg)->blkif_handle); + break; + case CMSG_BLKIF_BE_DISCONNECT: + if ( msg->length != sizeof(blkif_be_disconnect_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_DISCONNECT(d:%d,h:%d)\n", + ((blkif_be_disconnect_t *)msg->msg)->domid, + ((blkif_be_disconnect_t *)msg->msg)->blkif_handle); + break; + case CMSG_BLKIF_BE_VBD_CREATE: + if ( msg->length != sizeof(blkif_be_vbd_create_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_CREATE(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_create_t *)msg->msg)->domid, + ((blkif_be_vbd_create_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_create_t *)msg->msg)->vdevice); + break; + case CMSG_BLKIF_BE_VBD_DESTROY: + if ( msg->length != sizeof(blkif_be_vbd_destroy_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_DESTROY(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_destroy_t *)msg->msg)->domid, + ((blkif_be_vbd_destroy_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_destroy_t *)msg->msg)->vdevice); + break; + case CMSG_BLKIF_BE_VBD_GROW: + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_grow_t *)msg->msg)->domid, + ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); + printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); + break; + case CMSG_BLKIF_BE_VBD_SHRINK: + if ( msg->length != sizeof(blkif_be_vbd_shrink_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_SHRINK(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_shrink_t *)msg->msg)->domid, + ((blkif_be_vbd_shrink_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_shrink_t *)msg->msg)->vdevice); + break; + default: + goto parse_error; + } + + return 0; + +parse_error: + printf("[CONTROL_MSG] Bad message type or length!\n"); + return 0; +} + +int request_print(blkif_request_t *req) +{ + int i; + unsigned long fas; + + if ( req->operation == BLKIF_OP_PROBE ) { + printf("[%2u:%2u<%s]\n", ID_TO_DOM(req->id), ID_TO_IDX(req->id), + blkif_op_name[req->operation]); + return BLKTAP_PASS; + } else { + printf("[%2u:%2u<%5s] (nr_segs: %03u, dev: %03u, %010llu)\n", + ID_TO_DOM(req->id), ID_TO_IDX(req->id), + blkif_op_name[req->operation], + req->nr_segments, req->device, + req->sector_number); + + + for (i=0; i < req->nr_segments; i++) { + fas = req->frame_and_sects[i]; + printf(" (pf: 0x%8lx start: %lu stop: %lu)\n", + (fas & PAGE_MASK), + blkif_first_sect(fas), + blkif_last_sect(fas) + ); + } + + } + + return BLKTAP_PASS; +} + +int response_print(blkif_response_t *rsp) +{ + if ( rsp->operation == BLKIF_OP_PROBE ) { + printf("[%2u:%2u>%s]\n", ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), + blkif_op_name[rsp->operation]); + return BLKTAP_PASS; + } else { + printf("[%2u:%2u>%5s] (status: %d)\n", + ID_TO_DOM(rsp->id), ID_TO_IDX(rsp->id), + blkif_op_name[rsp->operation], + rsp->status); + + } + return BLKTAP_PASS; +} + +int main(int argc, char *argv[]) +{ + blktap_register_ctrl_hook("control_print", control_print); + blktap_register_request_hook("request_print", request_print); + blktap_register_response_hook("response_print", response_print); + blktap_listen(); + + return 0; +} diff --git a/tools/blktap/blkgnbd.c b/tools/blktap/blkgnbd.c new file mode 100644 index 0000000000..6a6bd67285 --- /dev/null +++ b/tools/blktap/blkgnbd.c @@ -0,0 +1,19 @@ +/* blkgnbd.c + * + * gnbd-backed disk. + */ + +#include "blktaplib.h" +#include "blkgnbdlib.h" + + +int main(int argc, char *argv[]) +{ + gnbd_init(); + + blktap_register_ctrl_hook("gnbd_control", gnbd_control); + blktap_register_request_hook("gnbd_request", gnbd_request); + blktap_listen(); + + return 0; +} diff --git a/tools/blktap/blkgnbdlib.c b/tools/blktap/blkgnbdlib.c new file mode 100644 index 0000000000..6eeb49c853 --- /dev/null +++ b/tools/blktap/blkgnbdlib.c @@ -0,0 +1,471 @@ +/* blkgnbdlib.c + * + * gnbd image-backed block device. + * + * (c) 2004 Andrew Warfield. + * + * Xend has been modified to use an amorfs:[fsid] disk tag. + * This will show up as device type (maj:240,min:0) = 61440. + * + * The fsid is placed in the sec_start field of the disk extent. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "blktaplib.h" +#include "libgnbd/libgnbd.h" + +#define GNBD_SERVER "skirmish.cl.cam.ac.uk" +#define GNBD_CLIENT "pengi-0.xeno.cl.cam.ac.uk" +#define GNBD_MOUNT "fc2_akw27" +#define GNBD_PORT 0x38e7 + +#define MAX_DOMS 1024 +#define MAX_IMGNAME_LEN 255 +#define AMORFS_DEV 61440 +#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ +#define SECTOR_SHIFT 9 + +#if 0 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + +#if 1 +#define ASSERT(_p) \ + if ( !(_p) ) { printf("Assertion '%s' failed, line %d, file %s", #_p , \ + __LINE__, __FILE__); *(int*)0=0; } +#else +#define ASSERT(_p) ((void)0) +#endif + +#define GH_DISCONNECTED 0 +#define GH_PROBEWAITING 1 +#define GH_CONNECTED 2 + +typedef struct { + /* These need to turn into an array/rbtree for multi-disk support. */ + struct gnbd_handle *gh; + int gh_state; + int probe_idx; /* This really needs cleaning up after hotos. */ + int fd; + u64 fsid; + char gnbdname[MAX_IMGNAME_LEN]; + blkif_vdev_t vdevice; +} gnbd_t; + +/* Note on pending_reqs: I assume all reqs are queued before they start to + * get filled. so count of 0 is an unused record. + */ +typedef struct { + blkif_request_t req; + int count; +} pending_req_t; + +static gnbd_t *gnbds[MAX_DOMS]; +static pending_req_t pending_list[MAX_REQUESTS]; +static int pending_count = 0; /* debugging */ + + +gnbd_t *get_gnbd_by_fd(int fd) +{ + /* this is a linear scan for the moment. nees to be cleaned up for + multi-disk support. */ + + int i; + + for (i=0; i< MAX_DOMS; i++) + if ((gnbds[i] != NULL) && (gnbds[i]->fd == fd)) + return gnbds[i]; + + return NULL; +} + +int gnbd_pollhook(int fd); + +int gnbd_control(control_msg_t *msg) +{ + domid_t domid; + DB *db; + int ret; + + if (msg->type != CMSG_BLKIF_BE) + { + printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); + return 0; + } + + switch(msg->subtype) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", + ((blkif_be_create_t *)msg->msg)->domid, + ((blkif_be_create_t *)msg->msg)->blkif_handle); + domid = ((blkif_be_create_t *)msg->msg)->domid; + if (gnbds[domid] != NULL) { + printf("attempt to connect from an existing dom!\n"); + return 0; + } + + gnbds[domid] = (gnbd_t *)malloc(sizeof(gnbd_t)); + if (gnbds[domid] == NULL) { + printf("error allocating gnbd record.\n"); + return 0; + } + + gnbds[domid]->gh = NULL; + gnbds[domid]->fsid = 0; + + break; + + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", + ((blkif_be_destroy_t *)msg->msg)->domid, + ((blkif_be_destroy_t *)msg->msg)->blkif_handle); + + domid = ((blkif_be_destroy_t *)msg->msg)->domid; + if (gnbds[domid] != NULL) { + if (gnbds[domid]->gh != NULL) { + blktap_detach_poll(gnbds[domid]->fd); + free(gnbds[domid]->gh); /* XXX: Need a gnbd close call! */; + } + free( gnbds[domid] ); + gnbds[domid] = NULL; + } + break; + case CMSG_BLKIF_BE_VBD_GROW: + { + blkif_be_vbd_grow_t *grow; + + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_grow_t *)msg->msg)->domid, + ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); + printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); + grow = (blkif_be_vbd_grow_t *)msg->msg; + domid = grow->domid; + if (gnbds[domid] == NULL) { + printf("VBD_GROW on unconnected domain!\n"); + return 0; + } + + if (grow->extent.device != AMORFS_DEV) { + printf("VBD_GROW on non-amorfs device!\n"); + return 0; + } + + /* TODO: config support for arbitrary gnbd files/modes. */ + sprintf(gnbds[domid]->gnbdname, GNBD_MOUNT); + + gnbds[domid]->fsid = grow->extent.sector_start; + gnbds[domid]->vdevice = grow->vdevice; + gnbds[domid]->gh_state = GH_DISCONNECTED; + gnbds[domid]->gh = gnbd_setup(GNBD_SERVER, GNBD_PORT, + gnbds[domid]->gnbdname, GNBD_CLIENT); + if (gnbds[domid]->gh == NULL) { + printf("Couldn't connect to gnbd mount!!\n"); + return 0; + } + gnbds[domid]->fd = gnbd_fd(gnbds[domid]->gh); + blktap_attach_poll(gnbds[domid]->fd, POLLIN, gnbd_pollhook); + + printf("gnbd mount connected. (%s)\n", gnbds[domid]->gnbdname); + break; + } + } + return 0; +parse_error: + printf("Bad control message!\n"); + return 0; + +create_failed: + /* TODO: close the db ref. */ + return 0; +} + +static int gnbd_blkif_probe(blkif_request_t *req, gnbd_t *gnbd) +{ + int fd; + struct stat stat; + vdisk_t *gnbd_info; + blkif_response_t *rsp; + + /* We expect one buffer only. */ + if ( req->nr_segments != 1 ) + goto err; + + /* Make sure the buffer is page-sized. */ + if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || + (blkif_last_sect (req->frame_and_sects[0]) != 7) ) + goto err; + + /* loop for multiple gnbds would start here. */ + + gnbd_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); + gnbd_info[0].device = gnbd->vdevice; + gnbd_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; + gnbd_info[0].capacity = gnbd_sectors(gnbd->gh); + + printf("[SECTORS] %llu", gnbd_info[0].capacity); + + //if (gnbd_info[0].capacity == 0) + // gnbd_info[0].capacity = ((u64)1 << 63); // xend does this too. + + DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", gnbd_info[0].device, + gnbd_info[0].capacity); + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_PROBE; + rsp->status = 1; /* number of disks */ + + return BLKTAP_RESPOND; +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; +} + +int gnbd_request(blkif_request_t *req) +{ + struct gnbd_handle *gh; + u64 sector; + char *spage, *dpage; + int ret, i, idx; + blkif_response_t *rsp; + domid_t dom = ID_TO_DOM(req->id); + + if ((gnbds[dom] == NULL) || (gnbds[dom]->gh == NULL)) { + printf("Data request for unknown domain!!! %d\n", dom); + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; + } + + gh = gnbds[dom]->gh; + + switch (req->operation) + { + case BLKIF_OP_PROBE: + { + printf("PROBE!\n"); + if ( gnbds[dom]->gh_state == GH_PROBEWAITING ) { + printf("Already have a PROBE outstanding!\n"); + goto err; + } + + if ( gnbds[dom]->gh_state == GH_DISCONNECTED ) + { + /* need to defer until we are connected. */ + printf("Deferring PROBE!\n"); + idx = ID_TO_IDX(req->id); + memcpy(&pending_list[idx].req, req, sizeof(*req)); + ASSERT(pending_list[idx].count == 0); + pending_list[idx].count = 1; + + gnbds[dom]->probe_idx = idx; + gnbds[dom]->gh_state = GH_PROBEWAITING; + + return BLKTAP_STOLEN; + } + + + return gnbd_blkif_probe(req, gnbds[dom]); + } + case BLKIF_OP_WRITE: + { + unsigned long size; + + idx = ID_TO_IDX(req->id); + ASSERT(pending_list[idx].count == 0); + memcpy(&pending_list[idx].req, req, sizeof(*req)); + pending_list[idx].count = req->nr_segments; + pending_count++; /* dbg */ + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT)); + + spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + + ret = gnbd_write(gh, sector, size, spage, (unsigned long)idx); + if (ret) { + printf("gnbd error on WRITE\n"); + goto err; + } + } +//printf("[WR] < %lu\n", (unsigned long)idx); + + return BLKTAP_STOLEN; + } + case BLKIF_OP_READ: + { + unsigned long size; + + idx = ID_TO_IDX(req->id); + ASSERT(pending_list[idx].count == 0); + memcpy(&pending_list[idx].req, req, sizeof(*req)); + pending_list[idx].count = req->nr_segments; + pending_count++; /* dbg */ + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT)); + + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + + ret = gnbd_read(gh, sector, size, dpage, (unsigned long)idx); + if (ret) { + printf("gnbd error on READ\n"); + goto err; + } + + } +//printf("[RD] < %lu\n", (unsigned long)idx); + + return BLKTAP_STOLEN; + } + } + + printf("Unknown block operation!\n"); +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; +} + +/* the gnbd library terminates the request stream. _resp is a noop. */ +int gnbd_response(blkif_response_t *rsp) +{ + return BLKTAP_PASS; +} + +int gnbd_pollhook(int fd) +{ + int err; + struct gnbd_handle *gh; + blkif_request_t *req; + blkif_response_t *rsp; + unsigned long idx; + + gnbd_t *gnbd = get_gnbd_by_fd(fd); + + if (gnbd == NULL) { + printf("GNBD badness: got poll hook on unknown device. (%d)\n", fd); + return -1; + } + gh = gnbd->gh; + err = gnbd_reply(gh); + switch (err) { + case GNBD_LOGIN_DONE: + if (gnbd->gh_state == GH_PROBEWAITING) { + req = (blkif_request_t *)&pending_list[gnbd->probe_idx].req; + printf("[!] Sending deferred PROBE!\n"); + gnbd_blkif_probe(req, gnbd); + pending_list[gnbd->probe_idx].count = 0; + rsp = (blkif_response_t *)req; + blktap_inject_response(rsp); + } + gnbd->gh_state = GH_CONNECTED; + printf("GNBD_LOGIN_DONE (%d)\n", fd); + break; + + case GNBD_REQUEST_DONE: /* switch to idx */ + idx = gnbd_finished_request(gh); + req = (blkif_request_t *)&pending_list[idx].req; + if ((idx > MAX_REQUESTS-1) || (pending_list[idx].count == 0)){ + printf("gnbd returned a bad cookie (%lu)!\n", idx); + break; + } + + pending_list[idx].count--; + + if (pending_list[idx].count == 0) { + blkif_request_t tmp = *req; + pending_count--; /* dbg */ + rsp = (blkif_response_t *)req; + rsp->id = tmp.id; + rsp->operation = tmp.operation; + rsp->status = BLKIF_RSP_OKAY; + blktap_inject_response(rsp); +/* +if (rsp->operation == BLKIF_OP_READ) { +printf("[RD] > %lu (%d pndg)\n", (unsigned long)idx, pending_count); +} else if (rsp->operation == BLKIF_OP_WRITE) { +printf("[WR] > %lu (%d pndg)\n", (unsigned long)idx, pending_count); +} else { +printf("[??] > %lu (%d pndg)\n", (unsigned long)idx, pending_count); +} +*/ + } + break; + + case GNBD_CONTINUE: + break; + + case 0: + break; + + default: + printf("gnbd_reply error"); + break; + } + return 0; +} + +void gnbd_init(void) +{ + int i; + + for (i = 0; i < MAX_DOMS; i++) + gnbds[i] = NULL; + + for (i = 0; i < MAX_REQUESTS; i++) + pending_list[i].count = 0; + + printf("GNBD image plugin initialized\n"); +} + diff --git a/tools/blktap/blkgnbdlib.h b/tools/blktap/blkgnbdlib.h new file mode 100644 index 0000000000..b95d2409ac --- /dev/null +++ b/tools/blktap/blkgnbdlib.h @@ -0,0 +1,16 @@ +/* blkgnbdlib.h + * + * gndb image-backed block device. + * + * (c) 2004 Andrew Warfield. + * + * Xend has been modified to use an amorfs:[fsid] disk tag. + * This will show up as device type (maj:240,min:0) = 61440. + * + * The fsid is placed in the sec_start field of the disk extent. + */ + +int gnbd_control(control_msg_t *msg); +int gnbd_request(blkif_request_t *req); +int gnbd_response(blkif_response_t *rsp); /* noop */ +void gnbd_init(void); diff --git a/tools/blktap/blkimg.c b/tools/blktap/blkimg.c new file mode 100644 index 0000000000..fc746add4b --- /dev/null +++ b/tools/blktap/blkimg.c @@ -0,0 +1,19 @@ +/* blkimg.c + * + * file-backed disk. + */ + +#include "blktaplib.h" +#include "blkimglib.h" + + +int main(int argc, char *argv[]) +{ + image_init(); + + blktap_register_ctrl_hook("image_control", image_control); + blktap_register_request_hook("image_request", image_request); + blktap_listen(); + + return 0; +} diff --git a/tools/blktap/blkimglib.c b/tools/blktap/blkimglib.c new file mode 100644 index 0000000000..075a2d962d --- /dev/null +++ b/tools/blktap/blkimglib.c @@ -0,0 +1,325 @@ +/* blkimglib.c + * + * file image-backed block device. + * + * (c) 2004 Andrew Warfield. + * + * Xend has been modified to use an amorfs:[fsid] disk tag. + * This will show up as device type (maj:240,min:0) = 61440. + * + * The fsid is placed in the sec_start field of the disk extent. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "blktaplib.h" + +//#define TMP_IMAGE_FILE_NAME "/dev/sda1" +#define TMP_IMAGE_FILE_NAME "fc3.image" + +#define MAX_DOMS 1024 +#define MAX_IMGNAME_LEN 255 +#define AMORFS_DEV 61440 +#define MAX_REQUESTS 64 /* must be synced with the blkif drivers. */ +#define SECTOR_SHIFT 9 + +#if 0 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif + + +typedef struct { + /* These need to turn into an array/rbtree for multi-disk support. */ + FILE *img; + u64 fsid; + char imgname[MAX_IMGNAME_LEN]; + blkif_vdev_t vdevice; +} image_t; + +image_t *images[MAX_DOMS]; +blkif_request_t *reread_list[MAX_REQUESTS]; + +int image_control(control_msg_t *msg) +{ + domid_t domid; + DB *db; + int ret; + + if (msg->type != CMSG_BLKIF_BE) + { + printf("***\nUNEXPECTED CTRL MSG MAJOR TYPE(%d)\n***\n", msg->type); + return 0; + } + + switch(msg->subtype) + { + case CMSG_BLKIF_BE_CREATE: + if ( msg->length != sizeof(blkif_be_create_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_CREATE(d:%d,h:%d)\n", + ((blkif_be_create_t *)msg->msg)->domid, + ((blkif_be_create_t *)msg->msg)->blkif_handle); + domid = ((blkif_be_create_t *)msg->msg)->domid; + if (images[domid] != NULL) { + printf("attempt to connect from an existing dom!\n"); + return 0; + } + + images[domid] = (image_t *)malloc(sizeof(image_t)); + if (images[domid] == NULL) { + printf("error allocating image record.\n"); + return 0; + } + + images[domid]->img = NULL; + images[domid]->fsid = 0; + + printf("Image connected.\n"); + break; + + case CMSG_BLKIF_BE_DESTROY: + if ( msg->length != sizeof(blkif_be_destroy_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_DESTROY(d:%d,h:%d)\n", + ((blkif_be_destroy_t *)msg->msg)->domid, + ((blkif_be_destroy_t *)msg->msg)->blkif_handle); + + domid = ((blkif_be_destroy_t *)msg->msg)->domid; + if (images[domid] != NULL) { + if (images[domid]->img != NULL) + fclose( images[domid]->img ); + free( images[domid] ); + images[domid] = NULL; + } + break; + case CMSG_BLKIF_BE_VBD_GROW: + { + blkif_be_vbd_grow_t *grow; + + if ( msg->length != sizeof(blkif_be_vbd_grow_t) ) + goto parse_error; + printf("[CONTROL_MSG] CMSG_BLKIF_BE_VBD_GROW(d:%d,h:%d,v:%d)\n", + ((blkif_be_vbd_grow_t *)msg->msg)->domid, + ((blkif_be_vbd_grow_t *)msg->msg)->blkif_handle, + ((blkif_be_vbd_grow_t *)msg->msg)->vdevice); + printf(" Extent: sec_start: %llu sec_len: %llu, dev: %d\n", + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_start, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.sector_length, + ((blkif_be_vbd_grow_t *)msg->msg)->extent.device); + grow = (blkif_be_vbd_grow_t *)msg->msg; + domid = grow->domid; + if (images[domid] == NULL) { + printf("VBD_GROW on unconnected domain!\n"); + return 0; + } + + if (grow->extent.device != AMORFS_DEV) { + printf("VBD_GROW on non-amorfs device!\n"); + return 0; + } + + /* TODO: config support for arbitrary image files/modes. */ + sprintf(images[domid]->imgname, TMP_IMAGE_FILE_NAME); + + images[domid]->fsid = grow->extent.sector_start; + images[domid]->vdevice = grow->vdevice; + images[domid]->img = fopen64(TMP_IMAGE_FILE_NAME, "r+"); + if (images[domid]->img == NULL) { + printf("Couldn't open image file!\n"); + return 0; + } + + printf("Image file opened. (%s)\n", images[domid]->imgname); + break; + } + } + return 0; +parse_error: + printf("Bad control message!\n"); + return 0; + +create_failed: + /* TODO: close the db ref. */ + return 0; +} + +int image_request(blkif_request_t *req) +{ + FILE *img; + u64 sector; + char *spage, *dpage; + int ret, i, idx; + blkif_response_t *rsp; + domid_t dom = ID_TO_DOM(req->id); + + if ((images[dom] == NULL) || (images[dom]->img == NULL)) { + printf("Data request for unknown domain!!! %d\n", dom); + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; + } + + img = images[dom]->img; + + switch (req->operation) + { + case BLKIF_OP_PROBE: + { + int fd; + struct stat stat; + vdisk_t *img_info; + + + /* We expect one buffer only. */ + if ( req->nr_segments != 1 ) + goto err; + + /* Make sure the buffer is page-sized. */ + if ( (blkif_first_sect(req->frame_and_sects[0]) != 0) || + (blkif_last_sect (req->frame_and_sects[0]) != 7) ) + goto err; + + /* loop for multiple images would start here. */ + + fd = fileno(img); + if (fd == -1) { + printf("Couldn't get image fd in PROBE!\n"); + goto err; + } + + ret = fstat(fd, &stat); + if (ret != 0) { + printf("Couldn't stat image in PROBE!\n"); + goto err; + } + + img_info = (vdisk_t *)MMAP_VADDR(ID_TO_IDX(req->id), 0); + img_info[0].device = images[dom]->vdevice; + img_info[0].info = VDISK_TYPE_DISK | VDISK_FLAG_VIRT; + img_info[0].capacity = (stat.st_size >> SECTOR_SHIFT); + + if (img_info[0].capacity == 0) + img_info[0].capacity = ((u64)1 << 63); // xend does this too. + + DPRINTF("iPROBE! device: 0x%04x capacity: %llu\n", img_info[0].device, + img_info[0].capacity); + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_PROBE; + rsp->status = 1; /* number of disks */ + + return BLKTAP_RESPOND; + } + case BLKIF_OP_WRITE: + { + unsigned long size; + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET); + if (ret != 0) { + printf("fseek error on WRITE\n"); + goto err; + } + + DPRINTF("iWRITE: sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT)); + + spage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + spage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + ret = fwrite(spage, size << SECTOR_SHIFT, 1, img); + if (ret != 1) { + printf("fwrite error on WRITE (%d)\n", errno); + goto err; + } + } + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_WRITE; + rsp->status = BLKIF_RSP_OKAY; + + return BLKTAP_RESPOND; + } + case BLKIF_OP_READ: + { + unsigned long size; + + for (i = 0; i < req->nr_segments; i++) { + + sector = req->sector_number + (8*i); + + size = blkif_last_sect (req->frame_and_sects[i]) - + blkif_first_sect(req->frame_and_sects[i]) + 1; + + ret = fseeko64(img, (off_t)(sector << SECTOR_SHIFT), SEEK_SET); + if (ret != 0) { + printf("fseek error on READ\n"); + goto err; + } + + DPRINTF("iREAD : sec_nr: %10llu sec: %10llu (%1lu,%1lu) pos: %15lu\n", + req->sector_number, sector, + blkif_first_sect(req->frame_and_sects[i]), + blkif_last_sect (req->frame_and_sects[i]), + (long)(sector << SECTOR_SHIFT)); + + dpage = (char *)MMAP_VADDR(ID_TO_IDX(req->id), i); + dpage += blkif_first_sect(req->frame_and_sects[i]) << SECTOR_SHIFT; + ret = fread(dpage, size << SECTOR_SHIFT, 1, img); + if (ret != 1) { + printf("fread error on READ\n"); + goto err; + } + } + + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = BLKIF_OP_READ; + rsp->status = BLKIF_RSP_OKAY; + return BLKTAP_RESPOND; + } + } + + printf("Unknow block operation!\n"); +err: + rsp = (blkif_response_t *)req; + rsp->id = req->id; + rsp->operation = req->operation; + rsp->status = BLKIF_RSP_ERROR; + return BLKTAP_RESPOND; +} + +/* the image library terminates the request stream. _resp is a noop. */ +int image_response(blkif_response_t *rsp) +{ + return BLKTAP_PASS; +} + +void image_init(void) +{ + int i; + + for (i = 0; i < MAX_DOMS; i++) + images[i] = NULL; +} + diff --git a/tools/blktap/blkimglib.h b/tools/blktap/blkimglib.h new file mode 100644 index 0000000000..1bc597f233 --- /dev/null +++ b/tools/blktap/blkimglib.h @@ -0,0 +1,16 @@ +/* blkimglib.h + * + * file image-backed block device. + * + * (c) 2004 Andrew Warfield. + * + * Xend has been modified to use an amorfs:[fsid] disk tag. + * This will show up as device type (maj:240,min:0) = 61440. + * + * The fsid is placed in the sec_start field of the disk extent. + */ + +int image_control(control_msg_t *msg); +int image_request(blkif_request_t *req); +int image_response(blkif_response_t *rsp); /* noop */ +void image_init(void); diff --git a/tools/blktap/blkint.h b/tools/blktap/blkint.h new file mode 100644 index 0000000000..e3ce3b55e1 --- /dev/null +++ b/tools/blktap/blkint.h @@ -0,0 +1,105 @@ +/* + * blkint.h + * + * Interfaces for the Xen block interposition driver. + * + * (c) 2004, Andrew Warfield, University of Cambridge + * + */ + +#ifndef __BLKINT_H__ + +//#include "blkif.h" + + +#if 0 +/* Types of ring. */ +#define BLKIF_REQ_RING_TYPE 1 +#define BLKIF_RSP_RING_TYPE 2 + +/* generic ring struct. */ +typedef struct blkif_generic_ring_struct { + int type; +} blkif_generic_ring_t; + +/* A requestor's view of a ring. */ +typedef struct blkif_req_ring_struct { + + int type; /* Will be BLKIF_REQ_RING_TYPE */ + BLKIF_RING_IDX req_prod; /* PRIVATE req_prod index */ + BLKIF_RING_IDX rsp_cons; /* Response consumer index */ + blkif_ring_t *ring; /* Pointer to shared ring struct */ + +} blkif_req_ring_t; + +#define BLKIF_REQ_RING_INIT { BLKIF_REQ_RING_TYPE, 0, 0, 0 } + +/* A responder's view of a ring. */ +typedef struct blkif_rsp_ring_struct { + + int type; /* Will be BLKIF_REQ_RING_TYPE */ + BLKIF_RING_IDX rsp_prod; /* PRIVATE rsp_prod index */ + BLKIF_RING_IDX req_cons; /* Request consumer index */ + blkif_ring_t *ring; /* Pointer to shared ring struct */ + +} blkif_rsp_ring_t; + +#define BLKIF_RSP_RING_INIT { BLKIF_RSP_RING_TYPE, 0, 0, 0 } + +#define RING(a) (blkif_generic_ring_t *)(a) +inline int BLKTAP_RING_FULL(blkif_generic_ring_t *ring); +#endif + +/* -------[ interposition -> character device interface ]------------- */ + +/* /dev/xen/blktap resides at device number major=10, minor=202 */ +#define BLKTAP_MINOR 202 + +/* size of the extra VMA area to map in attached pages. */ +#define BLKTAP_VMA_PAGES BLKIF_RING_SIZE + +/* blktap IOCTLs: */ +#define BLKTAP_IOCTL_KICK_FE 1 +#define BLKTAP_IOCTL_KICK_BE 2 +#define BLKTAP_IOCTL_SETMODE 3 +#define BLKTAP_IOCTL_PRINT_IDXS 100 + +/* blktap switching modes: (Set with BLKTAP_IOCTL_SETMODE) */ +#define BLKTAP_MODE_PASSTHROUGH 0x00000000 /* default */ +#define BLKTAP_MODE_INTERCEPT_FE 0x00000001 +#define BLKTAP_MODE_INTERCEPT_BE 0x00000002 +#define BLKTAP_MODE_COPY_FE 0x00000004 +#define BLKTAP_MODE_COPY_BE 0x00000008 +#define BLKTAP_MODE_COPY_FE_PAGES 0x00000010 +#define BLKTAP_MODE_COPY_BE_PAGES 0x00000020 + +#define BLKTAP_MODE_INTERPOSE \ + (BLKTAP_MODE_INTERCEPT_FE | BLKTAP_MODE_INTERCEPT_BE) + +#define BLKTAP_MODE_COPY_BOTH \ + (BLKTAP_MODE_COPY_FE | BLKTAP_MODE_COPY_BE) + +#define BLKTAP_MODE_COPY_BOTH_PAGES \ + (BLKTAP_MODE_COPY_FE_PAGES | BLKTAP_MODE_COPY_BE_PAGES) + +static inline int BLKTAP_MODE_VALID(unsigned long arg) +{ + return ( + ( arg == BLKTAP_MODE_PASSTHROUGH ) || + ( arg == BLKTAP_MODE_INTERCEPT_FE ) || + ( arg == BLKTAP_MODE_INTERCEPT_BE ) || + ( arg == BLKTAP_MODE_INTERPOSE ) || + ( (arg & ~BLKTAP_MODE_COPY_FE_PAGES) == BLKTAP_MODE_COPY_FE ) || + ( (arg & ~BLKTAP_MODE_COPY_BE_PAGES) == BLKTAP_MODE_COPY_BE ) || + ( (arg & ~BLKTAP_MODE_COPY_BOTH_PAGES) == BLKTAP_MODE_COPY_BOTH ) + ); +} + + + + + + + +#define __BLKINT_H__ +#endif diff --git a/tools/blktap/blktaplib.c b/tools/blktap/blktaplib.c new file mode 100644 index 0000000000..2399a20d7a --- /dev/null +++ b/tools/blktap/blktaplib.c @@ -0,0 +1,542 @@ +/* + * blktaplib.c + * + * userspace interface routines for the blktap driver. + * + * (c) 2004 Andrew Warfield. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define __COMPILING_BLKTAP_LIB +#include "blktaplib.h" + +#if 1 +#define DPRINTF(_f, _a...) printf ( _f , ## _a ) +#else +#define DPRINTF(_f, _a...) ((void)0) +#endif +#define DEBUG_RING_IDXS 1 + +#define POLLRDNORM 0x040 + +#define BLKTAP_IOCTL_KICK 1 + +// this is in the header now +//DEFINE_RING_TYPES(blkif, blkif_request_t, blkif_response_t); + +void got_sig_bus(); +void got_sig_int(); + + +/* in kernel these are opposite, but we are a consumer now. */ +blkif_back_ring_t fe_ring; /* slightly counterintuitive ;) */ +blkif_front_ring_t be_ring; +ctrl_back_ring_t ctrl_ring; + + + +unsigned long mmap_vstart = 0; +char *blktap_mem; +int fd = 0; + +#define BLKTAP_RING_PAGES 3 /* Ctrl, Back, Front */ +/*#define BLKTAP_MMAP_PAGES ((11 + 1) * 64)*/ +#define BLKTAP_MMAP_PAGES \ + ((BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) * BLKIF_RING_SIZE) +#define BLKTAP_MMAP_REGION_SIZE (BLKTAP_RING_PAGES + BLKTAP_MMAP_PAGES) + + + +int bad_count = 0; +void bad(void) +{ + bad_count ++; + if (bad_count > 50) exit(0); +} +/*-----[ ID Manipulation from tap driver code ]--------------------------*/ + +#define ACTIVE_RING_IDX unsigned short + +inline unsigned long MAKE_ID(domid_t fe_dom, ACTIVE_RING_IDX idx) +{ + return ( (fe_dom << 16) | idx ); +} + +inline unsigned int ID_TO_IDX(unsigned long id) +{ + return ( id & 0x0000ffff ); +} + +inline domid_t ID_TO_DOM(unsigned long id) { return (id >> 16); } +/* +static int (*request_hook)(blkif_request_t *req) = NULL; +static int (*response_hook)(blkif_response_t *req) = NULL; +*/ + +/*-----[ Request/Response hook chains.]----------------------------------*/ + +#define HOOK_NAME_MAX 50 + +typedef struct ctrl_hook_st { + char name[HOOK_NAME_MAX]; + int (*func)(control_msg_t *); + struct ctrl_hook_st *next; +} ctrl_hook_t; + +typedef struct request_hook_st { + char name[HOOK_NAME_MAX]; + int (*func)(blkif_request_t *); + struct request_hook_st *next; +} request_hook_t; + +typedef struct response_hook_st { + char name[HOOK_NAME_MAX]; + int (*func)(blkif_response_t *); + struct response_hook_st *next; +} response_hook_t; + +static ctrl_hook_t *ctrl_hook_chain = NULL; +static request_hook_t *request_hook_chain = NULL; +static response_hook_t *response_hook_chain = NULL; + +void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)) +{ + ctrl_hook_t *ch_ent, **c; + + ch_ent = (ctrl_hook_t *)malloc(sizeof(ctrl_hook_t)); + if (!ch_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } + + ch_ent->func = ch; + ch_ent->next = NULL; + strncpy(ch_ent->name, name, HOOK_NAME_MAX); + ch_ent->name[HOOK_NAME_MAX-1] = '\0'; + + c = &ctrl_hook_chain; + while (*c != NULL) { + c = &(*c)->next; + } + *c = ch_ent; +} + +void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)) +{ + request_hook_t *rh_ent, **c; + + rh_ent = (request_hook_t *)malloc(sizeof(request_hook_t)); + if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } + + rh_ent->func = rh; + rh_ent->next = NULL; + strncpy(rh_ent->name, name, HOOK_NAME_MAX); + + c = &request_hook_chain; + while (*c != NULL) { + c = &(*c)->next; + } + *c = rh_ent; +} + +void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)) +{ + response_hook_t *rh_ent, **c; + + rh_ent = (response_hook_t *)malloc(sizeof(response_hook_t)); + if (!rh_ent) { printf("couldn't allocate a new hook\n"); exit(-1); } + + rh_ent->func = rh; + rh_ent->next = NULL; + strncpy(rh_ent->name, name, HOOK_NAME_MAX); + + c = &response_hook_chain; + while (*c != NULL) { + c = &(*c)->next; + } + *c = rh_ent; +} + +void print_hooks(void) +{ + request_hook_t *req_hook; + response_hook_t *rsp_hook; + ctrl_hook_t *ctrl_hook; + + printf("Control Hooks:\n"); + ctrl_hook = ctrl_hook_chain; + while (ctrl_hook != NULL) + { + printf(" [0x%p] %s\n", ctrl_hook->func, ctrl_hook->name); + ctrl_hook = ctrl_hook->next; + } + + printf("Request Hooks:\n"); + req_hook = request_hook_chain; + while (req_hook != NULL) + { + printf(" [0x%p] %s\n", req_hook->func, req_hook->name); + req_hook = req_hook->next; + } + + printf("Response Hooks:\n"); + rsp_hook = response_hook_chain; + while (rsp_hook != NULL) + { + printf(" [0x%p] %s\n", rsp_hook->func, rsp_hook->name); + rsp_hook = rsp_hook->next; + } +} + +/*-----[ Data to/from Backend (server) VM ]------------------------------*/ + +inline int write_req_to_be_ring(blkif_request_t *req) +{ + blkif_request_t *req_d; + + //req_d = FRONT_RING_NEXT_EMPTY_REQUEST(&be_ring); + req_d = RING_GET_REQUEST(BLKIF_RING, &be_ring, be_ring.req_prod_pvt); + memcpy(req_d, req, sizeof(blkif_request_t)); + wmb(); + be_ring.req_prod_pvt++; + + return 0; +} + +inline int write_rsp_to_fe_ring(blkif_response_t *rsp) +{ + blkif_response_t *rsp_d; + + //rsp_d = BACK_RING_NEXT_EMPTY_RESPONSE(&fe_ring); + rsp_d = RING_GET_RESPONSE(BLKIF_RING, &fe_ring, fe_ring.rsp_prod_pvt); + memcpy(rsp_d, rsp, sizeof(blkif_response_t)); + wmb(); + fe_ring.rsp_prod_pvt++; + + return 0; +} + +static void apply_rsp_hooks(blkif_response_t *rsp) +{ + response_hook_t *rsp_hook; + + rsp_hook = response_hook_chain; + while (rsp_hook != NULL) + { + switch(rsp_hook->func(rsp)) + { + case BLKTAP_PASS: + break; + default: + printf("Only PASS is supported for resp hooks!\n"); + } + rsp_hook = rsp_hook->next; + } +} + +void blktap_inject_response(blkif_response_t *rsp) +{ + apply_rsp_hooks(rsp); + write_rsp_to_fe_ring(rsp); + RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring); + ioctl(fd, BLKTAP_IOCTL_KICK_FE); +} + +/*-----[ Polling fd listeners ]------------------------------------------*/ + +#define MAX_POLLFDS 64 + +typedef struct { + int (*func)(int fd); + struct pollfd *pfd; + int fd; + short events; + int active; +} pollhook_t; + +static struct pollfd pfd[MAX_POLLFDS+1]; +static pollhook_t pollhooks[MAX_POLLFDS]; +static unsigned int ph_freelist[MAX_POLLFDS]; +static unsigned int ph_cons, ph_prod; +#define nr_pollhooks() (MAX_POLLFDS - (ph_prod - ph_cons)) +#define PH_IDX(x) (x % MAX_POLLFDS) + +int blktap_attach_poll(int fd, short events, int (*func)(int fd)) +{ + pollhook_t *ph; + + if (nr_pollhooks() == MAX_POLLFDS) { + printf("Too many pollhooks!\n"); + return -1; + } + + ph = &pollhooks[ph_freelist[PH_IDX(ph_cons++)]]; + + ph->func = func; + ph->fd = fd; + ph->events = events; + ph->active = 1; + + printf("Added fd %d at ph index %d, now %d phs.\n", fd, ph_cons-1, + nr_pollhooks()); + + return 0; +} + +void blktap_detach_poll(int fd) +{ + int i; + + for (i=0; ifd == fd)) { + ph_freelist[PH_IDX(ph_prod++)] = i; + pollhooks[i].pfd->fd = -1; + pollhooks[i].active = 0; + break; + } + + printf("Removed fd %d at ph index %d, now %d phs.\n", fd, i, + nr_pollhooks()); +} + +void pollhook_init(void) +{ + int i; + + for (i=0; i < MAX_POLLFDS; i++) { + ph_freelist[i] = (i+1) % MAX_POLLFDS; + pollhooks[i].active = 0; + } + + ph_cons = 0; + ph_prod = MAX_POLLFDS; +} + +void __attribute__ ((constructor)) blktaplib_init(void) +{ + printf("[[ C O N S T R U C T O R ]]\n"); + pollhook_init(); +} + +/*-----[ The main listen loop ]------------------------------------------*/ + +int blktap_listen(void) +{ + int notify_be, notify_fe, tap_pfd; + + /* comms rings: */ + blkif_request_t *req; + blkif_response_t *rsp; + control_msg_t *msg; + blkif_sring_t *sring; + ctrl_sring_t *csring; + RING_IDX rp, i, pfd_count; + + /* handler hooks: */ + request_hook_t *req_hook; + response_hook_t *rsp_hook; + ctrl_hook_t *ctrl_hook; + + signal (SIGBUS, got_sig_bus); + signal (SIGINT, got_sig_int); + + print_hooks(); + + fd = open("/dev/blktap", O_RDWR); + if (fd == -1) { + printf("open failed! (%d)\n", errno); + goto open_failed; + } + + blktap_mem = mmap(0, PAGE_SIZE * BLKTAP_MMAP_REGION_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if ((int)blktap_mem == -1) { + printf("mmap failed! (%d)\n", errno); + goto mmap_failed; + } + + /* assign the rings to the mapped memory */ + csring = (ctrl_sring_t *)blktap_mem; + BACK_RING_INIT(CTRL_RING, &ctrl_ring, csring); + + sring = (blkif_sring_t *)((unsigned long)blktap_mem + PAGE_SIZE); + FRONT_RING_INIT(BLKIF_RING, &be_ring, sring); + + sring = (blkif_sring_t *)((unsigned long)blktap_mem + (2 *PAGE_SIZE)); + BACK_RING_INIT(BLKIF_RING, &fe_ring, sring); + + mmap_vstart = (unsigned long)blktap_mem + (BLKTAP_RING_PAGES << PAGE_SHIFT); + + printf("fe_ring mapped at: %p\n", fe_ring.sring); + printf("be_ring mapped at: %p\n", be_ring.sring); + + ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE ); + + while(1) { + int ret; + + /* build the poll list */ + + DPRINTF("Building poll list.\n"); + + pfd_count = 0; + for ( i=0; i < MAX_POLLFDS; i++ ) { + pollhook_t *ph = &pollhooks[i]; + + if (ph->active) { + pfd[pfd_count].fd = ph->fd; + pfd[pfd_count].events = ph->events; + ph->pfd = &pfd[pfd_count]; + pfd_count++; + } + } + + tap_pfd = pfd_count; + pfd[tap_pfd].fd = fd; + pfd[tap_pfd].events = POLLIN; + + DPRINTF("poll() %d fds.\n", pfd_count); + + if ( (ret = (poll(pfd, pfd_count+1, 10000)) == 0) ) { + if (DEBUG_RING_IDXS) + ioctl(fd, BLKTAP_IOCTL_PRINT_IDXS); + continue; + } + + DPRINTF("poll returned %d\n", ret); + + for (i=0; i < MAX_POLLFDS; i++) { + if ( (pollhooks[i].active ) && (pollhooks[i].pfd->revents ) ) + pollhooks[i].func(pollhooks[i].pfd->fd); + } + + if (pfd[tap_pfd].revents) { + + /* empty the control ring */ + rp = ctrl_ring.sring->req_prod; + rmb(); + for (i = ctrl_ring.req_cons; i < rp; i++) + { + msg = RING_GET_REQUEST(CTRL_RING, &ctrl_ring, i); + + ctrl_hook = ctrl_hook_chain; + while (ctrl_hook != NULL) + { + DPRINTF("CTRL_HOOK: %s\n", ctrl_hook->name); + /* We currently don't respond to ctrl messages. */ + ctrl_hook->func(msg); + ctrl_hook = ctrl_hook->next; + } + } + /* Using this as a unidirectional ring. */ + ctrl_ring.req_cons = ctrl_ring.rsp_prod_pvt = i; + RING_PUSH_RESPONSES(CTRL_RING, &ctrl_ring); + + /* empty the fe_ring */ + notify_fe = 0; + notify_be = RING_HAS_UNCONSUMED_REQUESTS(BLKIF_RING, &fe_ring); + rp = fe_ring.sring->req_prod; + rmb(); + for (i = fe_ring.req_cons; i != rp; i++) + { + int done = 0; /* stop forwarding this request */ + + req = RING_GET_REQUEST(BLKIF_RING, &fe_ring, i); + + DPRINTF("copying an fe request\n"); + + req_hook = request_hook_chain; + while (req_hook != NULL) + { + DPRINTF("REQ_HOOK: %s\n", req_hook->name); + switch(req_hook->func(req)) + { + case BLKTAP_RESPOND: + apply_rsp_hooks((blkif_response_t *)req); + write_rsp_to_fe_ring((blkif_response_t *)req); + notify_fe = 1; + done = 1; + break; + case BLKTAP_STOLEN: + done = 1; + break; + case BLKTAP_PASS: + break; + default: + printf("Unknown request hook return value!\n"); + } + if (done) break; + req_hook = req_hook->next; + } + + if (done == 0) write_req_to_be_ring(req); + + } + fe_ring.req_cons = i; + + /* empty the be_ring */ + notify_fe |= RING_HAS_UNCONSUMED_RESPONSES(BLKIF_RING, &be_ring); + rp = be_ring.sring->rsp_prod; + rmb(); + for (i = be_ring.rsp_cons; i != rp; i++) + { + + rsp = RING_GET_RESPONSE(BLKIF_RING, &be_ring, i); + + DPRINTF("copying a be request\n"); + + apply_rsp_hooks(rsp); + write_rsp_to_fe_ring(rsp); + } + be_ring.rsp_cons = i; + + /* notify the domains */ + + if (notify_be) { + DPRINTF("notifying be\n"); + RING_PUSH_REQUESTS(BLKIF_RING, &be_ring); + ioctl(fd, BLKTAP_IOCTL_KICK_BE); + } + + if (notify_fe) { + DPRINTF("notifying fe\n"); + RING_PUSH_RESPONSES(BLKIF_RING, &fe_ring); + ioctl(fd, BLKTAP_IOCTL_KICK_FE); + } + } + } + + + munmap(blktap_mem, PAGE_SIZE); + + mmap_failed: + close(fd); + + open_failed: + return 0; +} + +void got_sig_bus() { + printf("Attempted to access a page that isn't.\n"); + exit(-1); +} + +void got_sig_int() { + printf("quitting -- returning to passthrough mode.\n"); + if (fd > 0) ioctl(fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_PASSTHROUGH ); + exit(0); +} diff --git a/tools/blktap/blktaplib.h b/tools/blktap/blktaplib.h new file mode 100644 index 0000000000..7b38f565fc --- /dev/null +++ b/tools/blktap/blktaplib.h @@ -0,0 +1,76 @@ +/* blktaplib.h + * + * userland accessors to the block tap. + * + * for the moment this is rather simple. + */ + +#ifndef __BLKTAPLIB_H__ +#define __BLKTAPLIB_H__ + +#include + +typedef uint8_t u8; +typedef uint16_t u16; +typedef uint32_t u32; +typedef uint64_t u64; +typedef int8_t s8; +typedef int16_t s16; +typedef int32_t s32; +typedef int64_t s64; + +#if defined(__i386__) +#define rmb() __asm__ __volatile__ ( "lock; addl $0,0(%%esp)" : : : "memory" ) +#define wmb() __asm__ __volatile__ ( "" : : : "memory" ) +#else +#error "Define barriers" +#endif + +#include +#include +#include +#include +#include +#include "blkint.h" + +#define BLKTAP_PASS 0 /* Keep passing this request as normal. */ +#define BLKTAP_RESPOND 1 /* Request is now a reply. Return it. */ +#define BLKTAP_STOLEN 2 /* Hook has stolen request. */ + +#define domid_t unsigned short + +inline unsigned int ID_TO_IDX(unsigned long id); +inline domid_t ID_TO_DOM(unsigned long id); + +void blktap_register_ctrl_hook(char *name, int (*ch)(control_msg_t *)); +void blktap_register_request_hook(char *name, int (*rh)(blkif_request_t *)); +void blktap_register_response_hook(char *name, int (*rh)(blkif_response_t *)); +void blktap_inject_response(blkif_response_t *); +int blktap_attach_poll(int fd, short events, int (*func)(int)); +void blktap_detach_poll(int fd); +int blktap_listen(void); + +/*-----[ Accessing attached data page mappings ]-------------------------*/ +#define MMAP_PAGES_PER_REQUEST \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST + 1) +#define MMAP_VADDR(_req,_seg) \ + (mmap_vstart + \ + ((_req) * MMAP_PAGES_PER_REQUEST * PAGE_SIZE) + \ + ((_seg) * PAGE_SIZE)) + +extern unsigned long mmap_vstart; + + +/*-----[ Defines that are only used by library clients ]-----------------*/ + +#ifndef __COMPILING_BLKTAP_LIB + +static char *blkif_op_name[] = { + [BLKIF_OP_READ] = "READ", + [BLKIF_OP_WRITE] = "WRITE", + [BLKIF_OP_PROBE] = "PROBE", +}; + +#endif /* __COMPILING_BLKTAP_LIB */ + +#endif /* __BLKTAPLIB_H__ */ diff --git a/tools/blktap/libgnbd/Makefile b/tools/blktap/libgnbd/Makefile new file mode 100644 index 0000000000..4297c02148 --- /dev/null +++ b/tools/blktap/libgnbd/Makefile @@ -0,0 +1,8 @@ + +CFLAGS += -Wall -Werror -g +LDFLAGS += -g + +libgnbd.a: libgnbd.o + $(AR) r $@ $< + +gnbdtest: gnbdtest.o libgnbd.a diff --git a/tools/blktap/libgnbd/gnbdtest.c b/tools/blktap/libgnbd/gnbdtest.c new file mode 100644 index 0000000000..bc391591b0 --- /dev/null +++ b/tools/blktap/libgnbd/gnbdtest.c @@ -0,0 +1,90 @@ + +#include +#include +#include +#include +#include +#include + +#include + +#include "libgnbd.h" + +#define PRINTF(x) printf x +#if 0 +#define DFPRINTF(x...) fprintf(stderr, ##x) +#define DPRINTF(x) DFPRINTF x +#else +#define DPRINTF(x) +#endif + +static unsigned char buf1[8 << 9]; +static unsigned char buf2[8 << 9]; +static unsigned char buf3[8 << 9]; + +int +main(int argc, char **argv) +{ + struct gnbd_handle *gh; + struct pollfd pfd[1]; + int err, tout; + + gh = gnbd_setup("panik", 0x38e7, "cl349-nahant-beta2-root1", + "arcadians.cl.cam.ac.uk"); + if (gh == NULL) + errx(1, "gnbd_setup"); + + memset(pfd, 0, sizeof(pfd)); + pfd[0].fd = gnbd_fd(gh); + pfd[0].events = POLLIN; + + while ((tout = poll(pfd, 1, 0)) >= 0) { + if (tout == 0) + continue; + DPRINTF(("event\n")); + if (pfd[0].revents) { + err = gnbd_reply(gh); + pfd[0].events = POLLIN; + switch (err) { + case GNBD_LOGIN_DONE: + DPRINTF(("sectors: %08llu\n", + gnbd_sectors(gh))); + err = gnbd_read(gh, 8, 8, buf2, 1); + if (err) + warnx("gnbd_read"); + err = gnbd_read(gh, 0, 8, buf1, 0); + if (err) + warnx("gnbd_read"); + err = gnbd_read(gh, 16, 8, buf3, 2); + if (err) + warnx("gnbd_read"); + break; + case GNBD_REQUEST_DONE: + DPRINTF(("request done %ld\n", + gnbd_finished_request(gh))); + if (0 && gnbd_finished_request(gh) == 0) { + write(1, buf1, 8 << 9); + err = gnbd_write(gh, 0, 8, buf1, 10); + if (err) + warnx("gnbd_write"); + } + break; + case GNBD_CONTINUE: + DPRINTF(("continue\n")); + break; + case 0: + break; + case GNBD_CONTINUE_WRITE: + DPRINTF(("continue write\n")); + pfd[0].events |= POLLOUT; + break; + default: + warnx("gnbd_reply error"); + break; + } + DPRINTF(("got gnbd reply\n")); + } + } + + return 0; +} diff --git a/tools/blktap/libgnbd/libgnbd.c b/tools/blktap/libgnbd/libgnbd.c new file mode 100644 index 0000000000..2856ca311d --- /dev/null +++ b/tools/blktap/libgnbd/libgnbd.c @@ -0,0 +1,647 @@ +/* libgnbd.c + * + * gnbd client library + * + * Copyright (c) 2005, Christian Limpach + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include "libgnbd.h" + +#define PROTOCOL_VERSION 2 + +#define EXTERN_KILL_GSERV_REQ 5 +#define EXTERN_LOGIN_REQ 6 + +#define GNBD_REQUEST_MAGIC 0x37a07e00 +#define GNBD_KEEP_ALIVE_MAGIC 0x5b46d8c2 +#define GNBD_REPLY_MAGIC 0x41f09370 + +enum { + GNBD_CMD_READ = 0, + GNBD_CMD_WRITE = 1, + GNBD_CMD_DISC = 2, + GNBD_CMD_PING = 3 +}; + +#if __BYTE_ORDER == __BIG_ENDIAN +#define htonll(x) (x) +#define ntohll(x) (x) +#endif +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define htonll(x) bswap_64(x) +#define ntohll(x) bswap_64(x) +#endif + +#define PRINTF(x) printf x +#if 0 +#define DFPRINTF(x...) fprintf(stderr, ##x) +#define DPRINTF(x) DFPRINTF x +#else +#define DPRINTF(x) +#endif + +struct gnbd_request { + struct gnbd_request *gr_next; + unsigned char *gr_buf; + ssize_t gr_size; + ssize_t gr_done; + unsigned long gr_cookie; +}; + +struct gnbd_handle { + int gh_fd; + unsigned int gh_flags; + uint64_t gh_sectors; + char gh_devname[32]; + char gh_nodename[65]; + struct sockaddr_in gh_sin; + struct gnbd_request *gh_outstanding_requests; + struct gnbd_request **gh_outstanding_requests_last; + struct gnbd_request *gh_incoming_request; + unsigned long gh_finished_request; +}; +#define GHF_EXPECT_KILL_GSERV_REPLY 0x0001 +#define GHF_EXPECT_LOGIN_REPLY 0x0002 +#define GHF_INCOMING_REQUEST 0x0004 + +struct device_req { + char name[32]; +}; + +struct node_req { + char node_name[65]; +}; + +struct login_req { + uint64_t timestamp; + uint16_t version; + uint8_t pad[6]; + char devname[32]; +}; + +struct login_reply { + uint64_t sectors; + uint16_t version; + uint8_t err; + uint8_t pad[5]; +}; + +struct gnbd_server_request { + uint32_t magic; + uint32_t type; + char handle[8]; + uint64_t from; + uint32_t len; +} __attribute__ ((packed)); + +struct gnbd_server_reply { + uint32_t magic; + uint32_t error; + char handle[8]; +} __attribute__ ((packed)); + +static int +read_buf(int fd, void *buf, size_t count, size_t *read_count) +{ + int err; + + err = read(fd, buf, count); + if (read_count) { + if (err >= 0) + *read_count = err; + } else if (err != count) + return EINTR; /* xxx */ + return err < 0; +} + +static int +read_4(int fd, unsigned long *val) +{ + unsigned long buf; + int err; + + err = read_buf(fd, &buf, sizeof(buf), NULL); + if (err == 0) + *val = ntohl(buf); + return err; +} + +static int +write_buf(int fd, void *buf, size_t count) +{ + int err; + + err = write(fd, buf, count); + return err < 0; +} + +static int +write_4(int fd, unsigned long val) +{ + unsigned long buf; + int err; + + buf = htonl(val); + err = write_buf(fd, &buf, sizeof(buf)); + return err; +} + + +static int +socket_connect(struct gnbd_handle *gh) +{ + int err; + + if (gh->gh_fd >= 0) + return 0; + + gh->gh_fd = socket(PF_INET, SOCK_STREAM, 0); + if (gh->gh_fd < 0) { + warn("socket"); + return gh->gh_fd; + } + + err = connect(gh->gh_fd, (struct sockaddr *)&gh->gh_sin, + sizeof(gh->gh_sin)); + if (err) { + warn("connect"); + goto out; + } + + return 0; + out: + close (gh->gh_fd); + gh->gh_fd = -1; + return err; +} + +static int +socket_shutdown(struct gnbd_handle *gh) +{ + + close (gh->gh_fd); + gh->gh_fd = -1; + return 0; +} + +static int +find_request(struct gnbd_handle *gh, struct gnbd_request *gr) +{ + struct gnbd_request **tmp; + + for (tmp = &gh->gh_outstanding_requests; *tmp; + tmp = &(*tmp)->gr_next) { + if (*tmp == gr) { + *tmp = (*tmp)->gr_next; + if (*tmp == NULL) + gh->gh_outstanding_requests_last = tmp; + return 0; + } + } + return ENOENT; +} + +static int +kill_gserv(struct gnbd_handle *gh) +{ + struct device_req dr; + struct node_req nr; + int err; + + DPRINTF(("gnbd_kill_gserv\n")); + err = socket_connect(gh); + if (err) { + warnx("socket_connect"); + return err; + } + + err = write_4(gh->gh_fd, EXTERN_KILL_GSERV_REQ); + if (err) { + warnx("send EXTERN_LOGIN_REQ failed"); + goto out; + } + + strncpy(dr.name, gh->gh_devname, sizeof(dr.name)); + err = write_buf(gh->gh_fd, &dr, sizeof(dr)); + if (err) { + warnx("send device_req failed"); + goto out; + } + + strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name)); + err = write_buf(gh->gh_fd, &nr, sizeof(nr)); + if (err) { + warnx("send node_req failed"); + goto out; + } + + gh->gh_flags |= GHF_EXPECT_KILL_GSERV_REPLY; + DPRINTF(("gnbd_kill_gserv ok\n")); + + return 0; + out: + socket_shutdown(gh); + return err; +} + +static int +login(struct gnbd_handle *gh) +{ + struct login_req lr; + struct node_req nr; + int err; + uint64_t timestamp; + struct timeval tv; + + DPRINTF(("gnbd_login\n")); + err = socket_connect(gh); + if (err) { + warnx("socket_connect"); + return err; + } + + err = write_4(gh->gh_fd, EXTERN_LOGIN_REQ); + if (err) { + warnx("send EXTERN_LOGIN_REQ failed"); + goto out; + } + + err = gettimeofday(&tv, NULL); + if (err) { + warnx("gettimeofday"); + goto out; + } + timestamp = (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec; + + lr.timestamp = htonll(timestamp); + lr.version = htons(PROTOCOL_VERSION); + strncpy(lr.devname, gh->gh_devname, sizeof(lr.devname)); + err = write_buf(gh->gh_fd, &lr, sizeof(lr)); + if (err) { + warnx("send login_req failed"); + goto out; + } + + strncpy(nr.node_name, gh->gh_nodename, sizeof(nr.node_name)); + err = write_buf(gh->gh_fd, &nr, sizeof(nr)); + if (err) { + warnx("send node_req failed"); + goto out; + } + + gh->gh_flags |= GHF_EXPECT_LOGIN_REPLY; + + DPRINTF(("gnbd_login ok\n")); + return 0; + out: + socket_shutdown(gh); + return err; +} + +static int +kill_gserv_reply(struct gnbd_handle *gh) +{ + unsigned long reply; + int err; + + DPRINTF(("read gnbd_kill_gserv_reply\n")); + err = read_4(gh->gh_fd, &reply); + if (err) { + warnx("read kill_gserv_reply failed"); + return err; + } + + if (reply && reply != ENODEV) { + warnx("kill gserv failed: %s", strerror(reply)); + return reply; + } + + gh->gh_flags &= ~GHF_EXPECT_KILL_GSERV_REPLY; + socket_shutdown(gh); + + err = login(gh); + if (err) + warnx("gnbd_login"); + + return err; +} + +static int +login_reply(struct gnbd_handle *gh) +{ + struct login_reply lr; + int err; + + DPRINTF(("read gnbd_login_reply\n")); + err = read_buf(gh->gh_fd, &lr, sizeof(lr), NULL); + if (err) { + warnx("read login_reply failed"); + return err; + } + + if (lr.err) { + if (lr.version) { + warnx("gnbd version mismatch %04x != %04x", + PROTOCOL_VERSION, ntohs(lr.version)); + return EINVAL; + } + warnx("login refused: %s", strerror(lr.err)); + return lr.err; + } + gh->gh_sectors = ntohll(lr.sectors); + + gh->gh_flags &= ~GHF_EXPECT_LOGIN_REPLY; + + return GNBD_LOGIN_DONE; +} + +static int +incoming_request(struct gnbd_handle *gh) +{ + struct gnbd_request *gr = gh->gh_incoming_request; + ssize_t done; + int err; + + DPRINTF(("incoming_request: done %d size %d\n", gr->gr_done, + gr->gr_size)); + err = read_buf(gh->gh_fd, gr->gr_buf + gr->gr_done, + gr->gr_size - gr->gr_done, &done); + if (err) + goto out; + + DPRINTF(("incoming_request: got %d\n", done)); + gr->gr_done += done; + if (gr->gr_done == gr->gr_size) { + gh->gh_flags &= ~GHF_INCOMING_REQUEST; + gh->gh_finished_request = gr->gr_cookie; + free(gr); + return GNBD_REQUEST_DONE; + } + + return GNBD_CONTINUE; + + out: + gh->gh_flags &= ~GHF_INCOMING_REQUEST; + gh->gh_finished_request = 0; + free(gr); + return err; +} + + + +int +gnbd_close(struct gnbd_handle *gh) +{ + int err; + struct gnbd_request **tmp; + + for (tmp = &gh->gh_outstanding_requests; *tmp; tmp = &(*tmp)->gr_next) + free(*tmp); + + if (gh->gh_flags & GHF_INCOMING_REQUEST) + free(gh->gh_incoming_request); + + err = close(gh->gh_fd); + if (err) + warnx("close"); + free(gh); + + return err; +} + +int +gnbd_fd(struct gnbd_handle *gh) +{ + return gh->gh_fd; +} + +unsigned long +gnbd_finished_request(struct gnbd_handle *gh) +{ + return gh->gh_finished_request; +} + +int +gnbd_read(struct gnbd_handle *gh, uint64_t sector, ssize_t count, + unsigned char *buf, unsigned long cookie) +{ + struct gnbd_server_request gsr; + struct gnbd_request *gr; + int err; + + gr = malloc(sizeof(struct gnbd_request)); + if (gr == NULL) + return ENOMEM; + memset(gr, 0, sizeof(gr)); + + gr->gr_buf = buf; + gr->gr_size = count << 9; + gr->gr_done = 0; + gr->gr_cookie = cookie; + + gsr.magic = htonl(GNBD_REQUEST_MAGIC); + gsr.type = htonl(GNBD_CMD_READ); + gsr.from = htonll(sector << 9); + gsr.len = htonl(gr->gr_size); + memset(gsr.handle, 0, sizeof(gsr.handle)); + memcpy(gsr.handle, &gr, sizeof(gr)); + + err = write_buf(gh->gh_fd, &gsr, sizeof(gsr)); + if (err) { + warnx("write_buf"); + goto out; + } + + *gh->gh_outstanding_requests_last = gr; + gh->gh_outstanding_requests_last = &gr->gr_next; + + return 0; + + out: + free(gr); + return err; +} + +int +gnbd_write(struct gnbd_handle *gh, uint64_t sector, ssize_t count, + unsigned char *buf, unsigned long cookie) +{ + struct gnbd_server_request gsr; + struct gnbd_request *gr; + int err; + + gr = malloc(sizeof(struct gnbd_request)); + if (gr == NULL) + return ENOMEM; + memset(gr, 0, sizeof(gr)); + + gr->gr_buf = buf; + gr->gr_size = count << 9; + gr->gr_done = 0; + gr->gr_cookie = cookie; + + gsr.magic = htonl(GNBD_REQUEST_MAGIC); + gsr.type = htonl(GNBD_CMD_WRITE); + gsr.from = htonll(sector << 9); + gsr.len = htonl(gr->gr_size); + memset(gsr.handle, 0, sizeof(gsr.handle)); + memcpy(gsr.handle, &gr, sizeof(gr)); + + err = write_buf(gh->gh_fd, &gsr, sizeof(gsr)); + if (err) { + warnx("write_buf"); + goto out; + } + + /* XXX handle non-blocking socket */ + err = write_buf(gh->gh_fd, buf, gr->gr_size); + if (err) { + warnx("write_buf"); + goto out; + } + gr->gr_done += gr->gr_size; + + *gh->gh_outstanding_requests_last = gr; + gh->gh_outstanding_requests_last = &gr->gr_next; + + DPRINTF(("write done\n")); + + return 0; + + out: + free(gr); + return err; +} + +int +gnbd_reply(struct gnbd_handle *gh) +{ + struct gnbd_server_reply gsr; + struct gnbd_request *gr; + int err; + + DPRINTF(("gnbd_reply flags %x\n", gh->gh_flags)); + if ((gh->gh_flags & GHF_EXPECT_KILL_GSERV_REPLY)) + return kill_gserv_reply(gh); + if ((gh->gh_flags & GHF_EXPECT_LOGIN_REPLY)) + return login_reply(gh); + if ((gh->gh_flags & GHF_INCOMING_REQUEST)) + return incoming_request(gh); + + DPRINTF(("read response\n")); + err = read_buf(gh->gh_fd, &gsr, sizeof(gsr), NULL); + if (err) { + warnx("read gnbd_reply failed"); + return err; + } + + if (ntohl(gsr.error)) { + warnx("gnbd server reply error: %s", strerror(gsr.error)); + return gsr.error; + } + + switch (ntohl(gsr.magic)) { + case GNBD_KEEP_ALIVE_MAGIC: + DPRINTF(("read keep alive magic\n")); + return GNBD_CONTINUE; + case GNBD_REPLY_MAGIC: + DPRINTF(("read reply magic\n")); + memcpy(&gr, gsr.handle, sizeof(gr)); + err = find_request(gh, gr); + if (err) { + warnx("unknown request"); + return err; + } + if (gr->gr_done != gr->gr_size) { + gh->gh_incoming_request = gr; + gh->gh_flags |= GHF_INCOMING_REQUEST; + return GNBD_CONTINUE; + } else { + gh->gh_finished_request = gr->gr_cookie; + free(gr); + return GNBD_REQUEST_DONE; + } + default: + break; + } + + return GNBD_CONTINUE; +} + +uint64_t +gnbd_sectors(struct gnbd_handle *gh) +{ + + return gh->gh_sectors; +} + +struct gnbd_handle * +gnbd_setup(char *server, unsigned int port, char *devname, char *nodename) +{ + struct gnbd_handle *gh; + struct addrinfo *res, *ai; + int err; + + gh = malloc(sizeof(struct gnbd_handle)); + if (gh == NULL) + return NULL; + memset(gh, 0, sizeof(gh)); + gh->gh_fd = -1; + gh->gh_outstanding_requests_last = &gh->gh_outstanding_requests; + + strncpy(gh->gh_devname, devname, sizeof(gh->gh_devname)); + strncpy(gh->gh_nodename, nodename, sizeof(gh->gh_nodename)); + + err = getaddrinfo(server, NULL, NULL, &res); + if (err) { + if (err != EAI_SYSTEM) + warnx("getaddrinfo: %s", gai_strerror(err)); + else + warn("getaddrinfo: %s", gai_strerror(err)); + goto out; + } + + for (ai = res; ai; ai = ai->ai_next) { + if (ai->ai_socktype != SOCK_STREAM) + continue; + if (ai->ai_family == AF_INET) + break; + } + + if (ai == NULL) + goto out; + + gh->gh_sin.sin_family = ai->ai_family; + gh->gh_sin.sin_port = htons(port); + memcpy(&gh->gh_sin.sin_addr, + &((struct sockaddr_in *)ai->ai_addr)->sin_addr, + sizeof(gh->gh_sin.sin_addr)); + + err = kill_gserv(gh); + if (err) { + warnx("gnbd_kill_gserv"); + goto out; + } + + freeaddrinfo(res); + return gh; + out: + free(gh); + freeaddrinfo(res); + return NULL; +} diff --git a/tools/blktap/libgnbd/libgnbd.h b/tools/blktap/libgnbd/libgnbd.h new file mode 100644 index 0000000000..9fb3dbbd5f --- /dev/null +++ b/tools/blktap/libgnbd/libgnbd.h @@ -0,0 +1,25 @@ +/* libgnbd.h + * + * gnbd client library + * + * Copyright (c) 2005, Christian Limpach + */ + +#define GNBD_LOGIN_DONE 0x10001 +#define GNBD_REQUEST_DONE 0x10002 +#define GNBD_CONTINUE 0x10003 +#define GNBD_CONTINUE_WRITE 0x10004 + +struct gnbd_handle; +int gnbd_close(struct gnbd_handle *); +int gnbd_fd(struct gnbd_handle *); +unsigned long gnbd_finished_request(struct gnbd_handle *); +int gnbd_kill_gserv(struct gnbd_handle *); +int gnbd_login(struct gnbd_handle *); +int gnbd_read(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *, + unsigned long); +int gnbd_write(struct gnbd_handle *, uint64_t, ssize_t, unsigned char *, + unsigned long); +int gnbd_reply(struct gnbd_handle *); +uint64_t gnbd_sectors(struct gnbd_handle *); +struct gnbd_handle *gnbd_setup(char *, unsigned int, char *, char *); diff --git a/tools/python/xen/xend/server/blkif.py b/tools/python/xen/xend/server/blkif.py index 8a1de09bfc..f40f7d5b6b 100755 --- a/tools/python/xen/xend/server/blkif.py +++ b/tools/python/xen/xend/server/blkif.py @@ -363,6 +363,20 @@ class BlkDev(controller.SplitDev): Blkctl.block('unbind', self.type, self.node) def setNode(self, node): + + # NOTE: + # This clause is testing code for storage system experiments. + # Add a new disk type that will just pass an opaque id in the + # start_sector and use an experimental device type. + # Please contact andrew.warfield@cl.cam.ac.uk with any concerns. + if self.type == 'amorfs': + self.node = node + self.device = 61440 # (240,0) + self.start_sector = long(self.params) + self.nr_sectors = long(0) + return + # done. + mounted_mode = check_mounted(self, node) if not '!' in self.mode and mounted_mode: if mounted_mode is "w": diff --git a/xen/include/public/io/blkif.h b/xen/include/public/io/blkif.h index 4108f4e545..35b1b78f84 100644 --- a/xen/include/public/io/blkif.h +++ b/xen/include/public/io/blkif.h @@ -9,7 +9,7 @@ #ifndef __XEN_PUBLIC_IO_BLKIF_H__ #define __XEN_PUBLIC_IO_BLKIF_H__ -#include +#include "ring.h" #define blkif_vdev_t u16 #define blkif_sector_t u64 -- 2.30.2